|
1 #include <stdio.h> |
|
2 #include <sys/stat.h> |
|
3 #include <sys/types.h> |
|
4 #include <sys/mman.h> |
|
5 #include <fcntl.h> |
|
6 #include "dictre.h" |
|
7 |
|
8 static unsigned char *index; |
|
9 static int indexfd; |
|
10 static int indexsize; |
|
11 static FILE *defs; |
|
12 const static char indexname[] = "akcentiga.index"; |
|
13 const static char dictname[] = "akcentiga.dict"; |
|
14 |
|
15 int get_filesize(const char *fname) |
|
16 { |
|
17 struct stat st; |
|
18 int res; |
|
19 res = stat(fname, &st); |
|
20 if (res == -1) |
|
21 { |
|
22 fprintf(stderr, "Problem stating the file %s\n", fname); |
|
23 perror("Error:"); |
|
24 exit(-1); |
|
25 } |
|
26 |
|
27 return st.st_size; |
|
28 } |
|
29 |
|
30 void init_dictionary() |
|
31 { |
|
32 indexsize = get_filesize(indexname); |
|
33 indexfd = open(indexname, O_RDONLY); |
|
34 if (indexfd == -1) |
|
35 { |
|
36 fprintf(stderr, "Problem opening the file %s\n", indexname); |
|
37 perror("Error:"); |
|
38 exit(-1); |
|
39 } |
|
40 index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED, |
|
41 indexfd, 0); |
|
42 |
|
43 defs = fopen(dictname, "r"); |
|
44 if (defs == 0) |
|
45 { |
|
46 fprintf(stderr, "Problem opening the file %s\n", dictname); |
|
47 perror("Error:"); |
|
48 exit(-1); |
|
49 } |
|
50 } |
|
51 |
|
52 void end_dictionary() |
|
53 { |
|
54 munmap(index, indexsize); |
|
55 close(indexfd); |
|
56 fclose(defs); |
|
57 } |
|
58 |
|
59 static void fill_def(int offset, int length, char * def) |
|
60 { |
|
61 fseek(defs, offset, SEEK_SET); |
|
62 fread(def, 1, length, defs); |
|
63 } |
|
64 |
|
65 static int pointer_at_end(unsigned char *ptr) |
|
66 { |
|
67 if (ptr >= (index + indexsize)) |
|
68 return 1; |
|
69 return 0; |
|
70 } |
|
71 |
|
72 static char * skip_until_newline(char *from) |
|
73 { |
|
74 if (pointer_at_end(from)) |
|
75 return 0; |
|
76 while(*from != '\n' && *from != 0) |
|
77 { |
|
78 ++from; |
|
79 if(pointer_at_end(from)) |
|
80 return 0; |
|
81 } |
|
82 return from; |
|
83 } |
|
84 |
|
85 static int compare(const unsigned char *word, const unsigned char *test) |
|
86 { |
|
87 int i; |
|
88 |
|
89 /*printf("Comparing %s to %.20s\n", word, test);*/ |
|
90 for(i=0; word[i] != 0 && test[i] != 0; ++i) |
|
91 { |
|
92 if (word[i] != test[i]) |
|
93 { |
|
94 break; |
|
95 } |
|
96 } |
|
97 if (word[i] == 0 && test[i] == '\t') |
|
98 return 0; |
|
99 else if (word[i] == 0) |
|
100 return -1; |
|
101 else if (test[i] == '\t') |
|
102 return 1; |
|
103 else if (word[i] > test[i]) |
|
104 return 1; |
|
105 else if (word[i] < test[i]) |
|
106 return -1; |
|
107 |
|
108 /* It should never reach this. */ |
|
109 return -1; |
|
110 } |
|
111 |
|
112 static char * bin_search(const char *word) |
|
113 { |
|
114 int step, pivot; |
|
115 |
|
116 pivot = indexsize / 2; |
|
117 step = indexsize / 2; |
|
118 |
|
119 do |
|
120 { |
|
121 char *test; |
|
122 int comparision; |
|
123 test = index + pivot; |
|
124 test = skip_until_newline(test); |
|
125 if (test == 0) |
|
126 return 0; |
|
127 test += 1; /* skip exactly the new line */ |
|
128 |
|
129 comparision = compare(word, test); |
|
130 if (comparision == 0) |
|
131 { |
|
132 return test + strlen(word) + 1; /* skip word and \n */ |
|
133 } else if (comparision < 0) |
|
134 { |
|
135 step = step / 2; |
|
136 pivot = pivot - step; |
|
137 } else if (comparision > 0) |
|
138 { |
|
139 step = step / 2; |
|
140 pivot = pivot + step; |
|
141 } |
|
142 } while(step > 0); |
|
143 return 0; |
|
144 } |
|
145 |
|
146 static int my_get_int(char **pos) |
|
147 { |
|
148 int i; |
|
149 char *start; |
|
150 int val; |
|
151 |
|
152 start = *pos; |
|
153 for(i=0; start[i] != '\t' && start[i] != '\n'; ++i) |
|
154 ; |
|
155 val = str2int_len(start, i); |
|
156 *pos += i + 1; |
|
157 return val; |
|
158 } |
|
159 |
|
160 void find_def(const char *word, char * def) |
|
161 { |
|
162 int offset, len; |
|
163 char *pos; |
|
164 |
|
165 pos = bin_search(word); /* pos points to the offset already. */ |
|
166 if (pos == 0) |
|
167 { |
|
168 def[0] = 0; |
|
169 /*fprintf(stderr, "Cannot find %s\n", word);*/ |
|
170 return; |
|
171 } |
|
172 offset = my_get_int(&pos); /* increments pos */ |
|
173 len = my_get_int(&pos); /* increments pos */ |
|
174 fill_def(offset, len, def); |
|
175 } |