0
|
1 |
#include <stdio.h>
|
|
2 |
|
|
3 |
#include "dictre.h"
|
|
4 |
|
|
5 |
enum
|
|
6 |
{
|
|
7 |
MAX=500000
|
|
8 |
};
|
|
9 |
|
|
10 |
struct Word words[MAX];
|
|
11 |
int nwords;
|
|
12 |
struct Def defs[MAX];
|
|
13 |
int ndefs;
|
|
14 |
int dont_touch[20];
|
|
15 |
int ndont_touch;
|
|
16 |
|
5
|
17 |
void init_load()
|
0
|
18 |
{
|
|
19 |
ndefs = 0;
|
|
20 |
nwords = 0;
|
|
21 |
ndont_touch = 0;
|
|
22 |
}
|
|
23 |
|
|
24 |
static void new_word(struct Word *from)
|
|
25 |
{
|
2
|
26 |
static int dispnwords = 0;
|
0
|
27 |
memcpy(&words[nwords], from, sizeof(*from));
|
|
28 |
nwords++;
|
2
|
29 |
dispnwords++;
|
|
30 |
if (dispnwords >= 1000)
|
|
31 |
{
|
|
32 |
dispnwords = 0;
|
|
33 |
printf("Loaded: %i\n", nwords);
|
|
34 |
}
|
0
|
35 |
}
|
|
36 |
|
|
37 |
static void new_dont_touch(int n)
|
|
38 |
{
|
|
39 |
dont_touch[ndont_touch++] = n;
|
|
40 |
}
|
|
41 |
|
|
42 |
static int new_def(char *def, int offset, int length)
|
|
43 |
{
|
|
44 |
defs[ndefs].d = def;
|
|
45 |
defs[ndefs].offset = offset;
|
|
46 |
defs[ndefs].length = length;
|
|
47 |
return ndefs++;
|
|
48 |
}
|
|
49 |
|
|
50 |
static int search_def(int offset, int length)
|
|
51 |
{
|
|
52 |
int i;
|
|
53 |
|
|
54 |
for(i=0; i < ndefs; ++i)
|
|
55 |
{
|
|
56 |
if (defs[i].offset == offset &&
|
|
57 |
defs[i].length == length)
|
|
58 |
return i;
|
|
59 |
}
|
|
60 |
return -1;
|
|
61 |
}
|
|
62 |
|
|
63 |
static void print_word(struct Word *w)
|
|
64 |
{
|
|
65 |
printf("%s\t%i\n", w->w, w->def);
|
|
66 |
}
|
|
67 |
|
|
68 |
void load_dictionary(FILE *index, FILE *fdefs)
|
|
69 |
{
|
|
70 |
struct Word w;
|
|
71 |
int last_offset = 0;
|
5
|
72 |
int def_avoided = 0;
|
0
|
73 |
|
|
74 |
do {
|
|
75 |
int offset, length;
|
|
76 |
char *defstr;
|
|
77 |
w.w = get_word(index);
|
|
78 |
if (w.w == 0)
|
|
79 |
break;
|
5
|
80 |
/*printf("Word: %s\n", w.w);*/
|
0
|
81 |
offset = get_int(index);
|
|
82 |
length = get_int(index);
|
|
83 |
if (offset > last_offset)
|
|
84 |
{
|
|
85 |
w.def = -1;
|
|
86 |
last_offset = offset;
|
|
87 |
}
|
|
88 |
else
|
|
89 |
w.def = search_def(offset, length);
|
5
|
90 |
if (w.def == -1)
|
0
|
91 |
{
|
5
|
92 |
/* New definition */
|
|
93 |
int newindex, repindex;
|
0
|
94 |
defstr = get_def(fdefs, offset, length);
|
5
|
95 |
newindex = new_def(defstr, offset, length);
|
|
96 |
|
|
97 |
/* Store it in the hash for repeated defs */
|
|
98 |
repindex = def_repeated(&defs[newindex]);
|
|
99 |
if (repindex != -1)
|
|
100 |
{
|
|
101 |
def_avoided += 1;
|
|
102 |
printf("Repeated def avoided %i (word %s)\n", def_avoided, w.w);
|
|
103 |
remove_def(newindex);
|
|
104 |
newindex = repindex;
|
|
105 |
} else
|
|
106 |
new_hashdef(&defs[newindex], newindex);
|
|
107 |
|
|
108 |
/* Store the final index */
|
|
109 |
w.def = newindex;
|
0
|
110 |
}
|
|
111 |
/* sizeof -1 instead of strlen() */
|
|
112 |
if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
|
|
113 |
new_dont_touch(w.def);
|
|
114 |
new_word(&w);
|
|
115 |
} while(1);
|
|
116 |
}
|
|
117 |
|
|
118 |
void print_words()
|
|
119 |
{
|
|
120 |
int i;
|
|
121 |
|
|
122 |
for(i=0; i < nwords; ++i)
|
|
123 |
print_word(&words[i]);
|
|
124 |
}
|