|
1 #include <stdio.h> |
|
2 #include <assert.h> |
|
3 #include "dictre.h" |
|
4 |
|
5 enum |
|
6 { |
|
7 MAXHASH=1<<16 |
|
8 }; |
|
9 |
|
10 struct BareWord |
|
11 { |
|
12 struct BareWord *next; |
|
13 char *str; |
|
14 }; |
|
15 |
|
16 struct WordEntry |
|
17 { |
|
18 struct WordEntry *next; |
|
19 char *str; |
|
20 struct BareWord *accented; |
|
21 struct BareWord *unflexed; |
|
22 }; |
|
23 |
|
24 static struct WordEntry * wordlist[MAXHASH]; |
|
25 |
|
26 struct WordEntry * new_WordEntry() |
|
27 { |
|
28 struct WordEntry *tmp; |
|
29 tmp = (struct WordEntry *) malloc(sizeof(*tmp)); |
|
30 assert(tmp != 0); |
|
31 return tmp; |
|
32 } |
|
33 |
|
34 struct BareWord * new_BareWord() |
|
35 { |
|
36 struct BareWord *tmp; |
|
37 tmp = (struct BareWord *) malloc(sizeof(*tmp)); |
|
38 assert(tmp != 0); |
|
39 return tmp; |
|
40 } |
|
41 |
|
42 void init_wordlist() |
|
43 { |
|
44 int i; |
|
45 for(i=0; i < MAXHASH; ++i) |
|
46 { |
|
47 struct WordEntry *nodata; |
|
48 nodata = new_WordEntry(); |
|
49 assert(nodata != 0); |
|
50 nodata->str = 0; |
|
51 nodata->accented = 0; |
|
52 nodata->unflexed = 0; |
|
53 nodata->next = 0; |
|
54 wordlist[i] = nodata; |
|
55 } |
|
56 } |
|
57 |
|
58 static unsigned int hash_func(const unsigned char *str) |
|
59 { |
|
60 int res; |
|
61 char v; |
|
62 |
|
63 v = 0; |
|
64 |
|
65 /* Taking only the meaningful utf-8 codes */ |
|
66 if (str[2] != 0) |
|
67 v = str[3]; |
|
68 |
|
69 res = (str[1] << 8) + v; |
|
70 |
|
71 return res; |
|
72 } |
|
73 |
|
74 /* Word without accent */ |
|
75 struct WordEntry * does_word_exist(int hash, const char *word) |
|
76 { |
|
77 struct WordEntry *tmp; |
|
78 |
|
79 for(tmp = wordlist[hash]; tmp != 0; tmp = tmp->next) |
|
80 { |
|
81 if (tmp->str) /* The last item in the linked list will have str=0 */ |
|
82 if (strcmp(word, tmp->str) == 0) |
|
83 return tmp; |
|
84 } |
|
85 return 0; |
|
86 } |
|
87 |
|
88 void add_to_unflexed(struct WordEntry *pos, const char *word) |
|
89 { |
|
90 struct BareWord *tmp; |
|
91 |
|
92 if (pos->unflexed == 0) |
|
93 { |
|
94 pos->unflexed = new_BareWord(); |
|
95 tmp = pos->unflexed; |
|
96 tmp->str = strdup(word); |
|
97 tmp->next = 0; |
|
98 } else |
|
99 { |
|
100 /* Look for the same word */ |
|
101 for(tmp = pos->unflexed; tmp != 0; tmp = tmp->next) |
|
102 { |
|
103 if (strcmp(word, pos->str) == 0) |
|
104 break; |
|
105 } |
|
106 if (tmp == 0) |
|
107 { |
|
108 tmp = new_BareWord(); |
|
109 } else |
|
110 { |
|
111 struct BareWord *new; |
|
112 new = new_BareWord(); |
|
113 } |
|
114 } |
|
115 } |
|
116 |
|
117 void set_accented(struct WordEntry *pos, const char *word) |
|
118 { |
|
119 if (pos->accented) |
|
120 /* Will free the first parameter */ |
|
121 pos->accented->str = mix_accents(pos->accented->str, word); |
|
122 else |
|
123 { |
|
124 pos->accented = new_BareWord(); |
|
125 pos->accented->str = strdup(word); |
|
126 pos->accented->next = 0; |
|
127 } |
|
128 } |
|
129 |
|
130 void insert_word(const char *word, const char *unflexed) |
|
131 { |
|
132 int hash; |
|
133 unsigned char word_no_accent[MAXWORD]; |
|
134 struct WordEntry *found; |
|
135 unsigned int hash_num; |
|
136 |
|
137 remove_accent(word_no_accent, word); |
|
138 |
|
139 hash_num = hash_func(word_no_accent); |
|
140 |
|
141 /* Where to insert */ |
|
142 found = does_word_exist(hash_num, word_no_accent); |
|
143 if (found) |
|
144 { |
|
145 set_accented(found, word); |
|
146 /* TODO process word_no_accent */ |
|
147 } else /* Does not exist */ |
|
148 { |
|
149 /* new word */ |
|
150 struct WordEntry *new; |
|
151 |
|
152 new = new_WordEntry(); |
|
153 new->str = strdup(word_no_accent); |
|
154 new->unflexed = 0; |
|
155 add_to_unflexed(new, unflexed); |
|
156 new->accented = 0; |
|
157 set_accented(new, word); |
|
158 /* Put it on the head of the hash list */ |
|
159 new->next = wordlist[hash_num]; |
|
160 wordlist[hash_num] = new; |
|
161 } |
|
162 } |
|
163 |
|
164 static void dump_word(struct WordEntry *word) |
|
165 { |
|
166 printf("%s:%s\n", word->str, word->accented->str); |
|
167 } |
|
168 |
|
169 void dump_wordlist() |
|
170 { |
|
171 int i; |
|
172 for(i=0; i < MAXHASH; ++i) |
|
173 { |
|
174 struct WordEntry *word; |
|
175 word = wordlist[i]; |
|
176 while (word != 0) |
|
177 { |
|
178 if (word->str) |
|
179 dump_word(word); |
|
180 word = word->next; |
|
181 } |
|
182 } |
|
183 } |