author | viric@llimona |
Tue, 28 Aug 2007 01:03:24 +0200 | |
changeset 11 | 68ea18fe402c |
child 13 | f71e89074c62 |
permissions | -rw-r--r-- |
11
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
1 |
#include <stdio.h> |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
2 |
#include <assert.h> |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
3 |
#include "dictre.h" |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
4 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
5 |
enum |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
6 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
7 |
MAXHASH=1<<16 |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
8 |
}; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
9 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
10 |
struct BareWord |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
11 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
12 |
struct BareWord *next; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
13 |
char *str; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
14 |
}; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
15 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
16 |
struct WordEntry |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
17 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
18 |
struct WordEntry *next; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
19 |
char *str; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
20 |
struct BareWord *accented; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
21 |
struct BareWord *unflexed; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
22 |
}; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
23 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
24 |
static struct WordEntry * wordlist[MAXHASH]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
25 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
26 |
struct WordEntry * new_WordEntry() |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
27 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
28 |
struct WordEntry *tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
29 |
tmp = (struct WordEntry *) malloc(sizeof(*tmp)); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
30 |
assert(tmp != 0); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
31 |
return tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
32 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
33 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
34 |
struct BareWord * new_BareWord() |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
35 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
36 |
struct BareWord *tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
37 |
tmp = (struct BareWord *) malloc(sizeof(*tmp)); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
38 |
assert(tmp != 0); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
39 |
return tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
40 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
41 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
42 |
void init_wordlist() |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
43 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
44 |
int i; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
45 |
for(i=0; i < MAXHASH; ++i) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
46 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
47 |
struct WordEntry *nodata; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
48 |
nodata = new_WordEntry(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
49 |
assert(nodata != 0); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
50 |
nodata->str = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
51 |
nodata->accented = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
52 |
nodata->unflexed = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
53 |
nodata->next = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
54 |
wordlist[i] = nodata; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
55 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
56 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
57 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
58 |
static unsigned int hash_func(const unsigned char *str) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
59 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
60 |
int res; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
61 |
char v; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
62 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
63 |
v = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
64 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
65 |
/* Taking only the meaningful utf-8 codes */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
66 |
if (str[2] != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
67 |
v = str[3]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
68 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
69 |
res = (str[1] << 8) + v; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
70 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
71 |
return res; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
72 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
73 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
74 |
/* Word without accent */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
75 |
struct WordEntry * does_word_exist(int hash, const char *word) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
76 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
77 |
struct WordEntry *tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
78 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
79 |
for(tmp = wordlist[hash]; tmp != 0; tmp = tmp->next) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
80 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
81 |
if (tmp->str) /* The last item in the linked list will have str=0 */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
82 |
if (strcmp(word, tmp->str) == 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
83 |
return tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
84 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
85 |
return 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
86 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
87 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
88 |
void add_to_unflexed(struct WordEntry *pos, const char *word) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
89 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
90 |
struct BareWord *tmp; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
91 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
92 |
if (pos->unflexed == 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
93 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
94 |
pos->unflexed = new_BareWord(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
95 |
tmp = pos->unflexed; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
96 |
tmp->str = strdup(word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
97 |
tmp->next = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
98 |
} else |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
99 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
100 |
/* Look for the same word */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
101 |
for(tmp = pos->unflexed; tmp != 0; tmp = tmp->next) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
102 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
103 |
if (strcmp(word, pos->str) == 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
104 |
break; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
105 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
106 |
if (tmp == 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
107 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
108 |
tmp = new_BareWord(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
109 |
} else |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
110 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
111 |
struct BareWord *new; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
112 |
new = new_BareWord(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
113 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
114 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
115 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
116 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
117 |
void set_accented(struct WordEntry *pos, const char *word) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
118 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
119 |
if (pos->accented) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
120 |
/* Will free the first parameter */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
121 |
pos->accented->str = mix_accents(pos->accented->str, word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
122 |
else |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
123 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
124 |
pos->accented = new_BareWord(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
125 |
pos->accented->str = strdup(word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
126 |
pos->accented->next = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
127 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
128 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
129 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
130 |
void insert_word(const char *word, const char *unflexed) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
131 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
132 |
int hash; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
133 |
unsigned char word_no_accent[MAXWORD]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
134 |
struct WordEntry *found; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
135 |
unsigned int hash_num; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
136 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
137 |
remove_accent(word_no_accent, word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
138 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
139 |
hash_num = hash_func(word_no_accent); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
140 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
141 |
/* Where to insert */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
142 |
found = does_word_exist(hash_num, word_no_accent); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
143 |
if (found) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
144 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
145 |
set_accented(found, word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
146 |
/* TODO process word_no_accent */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
147 |
} else /* Does not exist */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
148 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
149 |
/* new word */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
150 |
struct WordEntry *new; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
151 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
152 |
new = new_WordEntry(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
153 |
new->str = strdup(word_no_accent); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
154 |
new->unflexed = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
155 |
add_to_unflexed(new, unflexed); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
156 |
new->accented = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
157 |
set_accented(new, word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
158 |
/* Put it on the head of the hash list */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
159 |
new->next = wordlist[hash_num]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
160 |
wordlist[hash_num] = new; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
161 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
162 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
163 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
164 |
static void dump_word(struct WordEntry *word) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
165 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
166 |
printf("%s:%s\n", word->str, word->accented->str); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
167 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
168 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
169 |
void dump_wordlist() |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
170 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
171 |
int i; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
172 |
for(i=0; i < MAXHASH; ++i) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
173 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
174 |
struct WordEntry *word; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
175 |
word = wordlist[i]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
176 |
while (word != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
177 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
178 |
if (word->str) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
179 |
dump_word(word); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
180 |
word = word->next; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
181 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
182 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
183 |
} |