0
|
1 |
#include <stdio.h>
|
|
2 |
|
|
3 |
#include "dictre.h"
|
|
4 |
|
|
5 |
enum
|
|
6 |
{
|
|
7 |
MAX=500000
|
|
8 |
};
|
|
9 |
|
|
10 |
struct Word words[MAX];
|
|
11 |
int nwords;
|
|
12 |
struct Def defs[MAX];
|
|
13 |
int ndefs;
|
|
14 |
int dont_touch[20];
|
|
15 |
int ndont_touch;
|
|
16 |
|
5
|
17 |
void init_load()
|
0
|
18 |
{
|
|
19 |
ndefs = 0;
|
|
20 |
nwords = 0;
|
|
21 |
ndont_touch = 0;
|
|
22 |
}
|
|
23 |
|
|
24 |
static void new_word(struct Word *from)
|
|
25 |
{
|
|
26 |
memcpy(&words[nwords], from, sizeof(*from));
|
|
27 |
nwords++;
|
|
28 |
}
|
|
29 |
|
|
30 |
static void new_dont_touch(int n)
|
|
31 |
{
|
|
32 |
dont_touch[ndont_touch++] = n;
|
|
33 |
}
|
|
34 |
|
|
35 |
static int new_def(char *def, int offset, int length)
|
|
36 |
{
|
|
37 |
defs[ndefs].d = def;
|
|
38 |
defs[ndefs].offset = offset;
|
|
39 |
defs[ndefs].length = length;
|
|
40 |
return ndefs++;
|
|
41 |
}
|
|
42 |
|
|
43 |
static int search_def(int offset, int length)
|
|
44 |
{
|
|
45 |
int i;
|
|
46 |
|
|
47 |
for(i=0; i < ndefs; ++i)
|
|
48 |
{
|
|
49 |
if (defs[i].offset == offset &&
|
|
50 |
defs[i].length == length)
|
|
51 |
return i;
|
|
52 |
}
|
|
53 |
return -1;
|
|
54 |
}
|
|
55 |
|
|
56 |
static void print_word(struct Word *w)
|
|
57 |
{
|
|
58 |
printf("%s\t%i\n", w->w, w->def);
|
|
59 |
}
|
|
60 |
|
|
61 |
void load_dictionary(FILE *index, FILE *fdefs)
|
|
62 |
{
|
|
63 |
struct Word w;
|
|
64 |
int last_offset = 0;
|
5
|
65 |
int def_avoided = 0;
|
6
|
66 |
int numword = 0;;
|
|
67 |
static int dispnwords = 0;
|
0
|
68 |
|
|
69 |
do {
|
|
70 |
int offset, length;
|
|
71 |
char *defstr;
|
|
72 |
w.w = get_word(index);
|
6
|
73 |
/*numword++;
|
|
74 |
printf("words: %i\n", numword);*/
|
0
|
75 |
if (w.w == 0)
|
|
76 |
break;
|
5
|
77 |
/*printf("Word: %s\n", w.w);*/
|
0
|
78 |
offset = get_int(index);
|
|
79 |
length = get_int(index);
|
|
80 |
if (offset > last_offset)
|
|
81 |
{
|
|
82 |
w.def = -1;
|
|
83 |
last_offset = offset;
|
|
84 |
}
|
|
85 |
else
|
|
86 |
w.def = search_def(offset, length);
|
5
|
87 |
if (w.def == -1)
|
0
|
88 |
{
|
5
|
89 |
/* New definition */
|
|
90 |
int newindex, repindex;
|
0
|
91 |
defstr = get_def(fdefs, offset, length);
|
5
|
92 |
newindex = new_def(defstr, offset, length);
|
6
|
93 |
/*
|
|
94 |
printf("Length %i (%s): %i\n", newindex, w.w, length);
|
|
95 |
*/
|
5
|
96 |
|
|
97 |
/* Store it in the hash for repeated defs */
|
|
98 |
repindex = def_repeated(&defs[newindex]);
|
|
99 |
if (repindex != -1)
|
|
100 |
{
|
|
101 |
def_avoided += 1;
|
6
|
102 |
/*
|
|
103 |
printf("Repeated def avoided %i (for def %i)"
|
|
104 |
" (%s)\n%i %s\n%i %s\n",
|
|
105 |
def_avoided, repindex, w.w,
|
|
106 |
length, defstr,
|
|
107 |
defs[repindex].length, defs[repindex].d);
|
|
108 |
*/
|
5
|
109 |
remove_def(newindex);
|
|
110 |
newindex = repindex;
|
|
111 |
} else
|
|
112 |
new_hashdef(&defs[newindex], newindex);
|
|
113 |
|
|
114 |
/* Store the final index */
|
|
115 |
w.def = newindex;
|
0
|
116 |
}
|
|
117 |
/* sizeof -1 instead of strlen() */
|
|
118 |
if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
|
|
119 |
new_dont_touch(w.def);
|
|
120 |
new_word(&w);
|
6
|
121 |
|
|
122 |
/* stdout Display */
|
|
123 |
dispnwords++;
|
|
124 |
if (dispnwords >= 1000)
|
|
125 |
{
|
|
126 |
dispnwords = 0;
|
|
127 |
printf("Loaded: %i Repeated definitions avoided: %i\n", nwords,
|
|
128 |
def_avoided);
|
|
129 |
}
|
|
130 |
|
0
|
131 |
} while(1);
|
|
132 |
}
|
|
133 |
|
|
134 |
void print_words()
|
|
135 |
{
|
|
136 |
int i;
|
|
137 |
|
|
138 |
for(i=0; i < nwords; ++i)
|
|
139 |
print_word(&words[i]);
|
|
140 |
}
|