author | viric@llimona |
Wed, 29 Aug 2007 00:19:14 +0200 | |
changeset 14 | a961bb8806b9 |
parent 12 | c755c945a96a |
child 15 | 17a66ceb774a |
permissions | -rw-r--r-- |
11
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
1 |
#include <stdio.h> |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
2 |
#include "dictre.h" |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
3 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
4 |
static int closed_accent(const unsigned char *tmp) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
5 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
6 |
if (tmp[0] == 0xcc && tmp[1] == 0x81) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
7 |
return 1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
8 |
return 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
9 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
10 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
11 |
static int open_accent(const unsigned char *tmp) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
12 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
13 |
if (tmp[0] == 0x60) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
14 |
return 1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
15 |
return 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
16 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
17 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
18 |
/* Must free what is needed */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
19 |
char * mix_accents(char *a, const char *b) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
20 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
21 |
int ia,ib,o; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
22 |
char *out; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
23 |
char tmp[MAXWORD]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
24 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
25 |
ia = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
26 |
ib = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
27 |
o = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
28 |
while(a[ia] != 0 || b[ib] != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
29 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
30 |
if (closed_accent(&a[ia])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
31 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
32 |
tmp[o] = a[ia]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
33 |
tmp[o+1] = a[ia+1]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
34 |
o+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
35 |
ia+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
36 |
if(closed_accent(&b[ib])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
37 |
ib+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
38 |
continue; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
39 |
} else if (closed_accent(&b[ib])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
40 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
41 |
tmp[o] = b[ib]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
42 |
tmp[o+1] = b[ib+1]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
43 |
o+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
44 |
ib+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
45 |
continue; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
46 |
} else if (open_accent(&a[ia])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
47 |
{ |
12 | 48 |
tmp[o] = a[ia]; |
11
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
49 |
o+=1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
50 |
ia+=1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
51 |
if (open_accent(&b[ib])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
52 |
ib+=1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
53 |
continue; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
54 |
} else if (open_accent(&b[ib])) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
55 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
56 |
tmp[o] = b[ib]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
57 |
o+=1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
58 |
ib+=1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
59 |
continue; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
60 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
61 |
else |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
62 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
63 |
/* Letter */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
64 |
tmp[o] = a[ia]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
65 |
if (a[ia] != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
66 |
++ia; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
67 |
if (b[ib] != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
68 |
++ib; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
69 |
++o; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
70 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
71 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
72 |
tmp[o] = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
73 |
out = strdup(tmp); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
74 |
free(a); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
75 |
return out; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
76 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
77 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
78 |
void remove_accent(unsigned char *dest, const unsigned char *from) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
79 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
80 |
int i,o; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
81 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
82 |
i = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
83 |
o = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
84 |
while (from[i] != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
85 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
86 |
if (from[i] == 0xcc && from[i+1] == 0x81) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
87 |
i+=2; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
88 |
else if (from[i] == 0x60) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
89 |
++i; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
90 |
else |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
91 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
92 |
dest[o] = from[i]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
93 |
++o; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
94 |
++i; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
95 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
96 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
97 |
dest[o] = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
98 |
} |
14 | 99 |
|
100 |
int skip_newline(const char *str, int *index) |
|
101 |
{ |
|
102 |
while(str[*index] != 0 && str[*index] != '\n') |
|
103 |
{ |
|
104 |
++*index; |
|
105 |
} |
|
106 |
||
107 |
if (str[*index] == '\n') |
|
108 |
return *index; |
|
109 |
||
110 |
return -1; |
|
111 |
} |
|
112 |
||
113 |
int until_noword(const char *str, int *index) |
|
114 |
{ |
|
115 |
while(str[*index] != 0 && |
|
116 |
str[*index] != ' ' && |
|
117 |
str[*index] != '\n' && |
|
118 |
str[*index] != '\r' && |
|
119 |
str[*index] != ',') |
|
120 |
{ |
|
121 |
++*index; |
|
122 |
} |
|
123 |
||
124 |
if (str[*index] != 0) |
|
125 |
return *index; |
|
126 |
||
127 |
return -1; |
|
128 |
} |
|
129 |
||
130 |
int is_ASCII(unsigned char c) |
|
131 |
{ |
|
132 |
if (c < 128) |
|
133 |
return 1; |
|
134 |
return 0; |
|
135 |
} |
|
136 |
||
137 |
int until_newword(const unsigned char *str, int *index) |
|
138 |
{ |
|
139 |
while(str[*index] != 0 && is_ASCII(str[*index])) |
|
140 |
{ |
|
141 |
++*index; |
|
142 |
} |
|
143 |
||
144 |
if (str[*index] != 0); |
|
145 |
return *index; |
|
146 |
||
147 |
return -1; |
|
148 |
} |