author | viric@llimona |
Sat, 18 Aug 2007 23:18:41 +0200 | |
changeset 43 | 988a367ae8d2 |
parent 38 | 45c0a27c902f |
permissions | -rw-r--r-- |
0 | 1 |
package jdict; |
2 |
||
3 |
import java.io.*; |
|
4 |
import java.util.Vector; |
|
5 |
||
6 |
public class DictIndex |
|
7 |
{ |
|
2 | 8 |
BlockFile in; |
0 | 9 |
|
10 |
String index_basename; |
|
11 |
String defs_basename; |
|
12 |
int index_id; |
|
13 |
||
37 | 14 |
int top_index; /* File size of the Index file */ |
15 |
||
16 |
Vector first_words; |
|
11 | 17 |
|
0 | 18 |
public DictIndex(String dictname) |
19 |
{ |
|
20 |
index_basename = "/dicts/x-" + dictname + ".index"; |
|
2 | 21 |
in = new BlockFile(index_basename); |
11 | 22 |
get_top_index(); |
38 | 23 |
/* |
24 |
System.gc(); |
|
25 |
Main.showMem(); |
|
26 |
*/ |
|
37 | 27 |
prepare_first_words(); |
38 | 28 |
in.enableMarks(); |
11 | 29 |
} |
30 |
||
37 | 31 |
public void prepare_first_words() |
32 |
{ |
|
33 |
int bsize = in.get_block_size(); |
|
34 |
int max_blocks = top_index / bsize; |
|
35 |
int i; |
|
36 |
String str; |
|
37 |
||
38 | 38 |
if (max_blocks * bsize < top_index) |
39 |
max_blocks++; |
|
40 |
||
37 | 41 |
first_words = new Vector(); |
42 |
||
43 |
/* We assume offset 0 */ |
|
44 |
||
45 |
/* First word appart */ |
|
46 |
str = getName(); |
|
47 |
first_words.addElement(new FirstWord(str, 0)); |
|
48 |
||
49 |
/* Next words */ |
|
50 |
for(i=1; i<max_blocks; ++i) |
|
51 |
{ |
|
52 |
int offset = bsize * i; |
|
53 |
int pos; |
|
54 |
||
55 |
try { |
|
56 |
in.seekFromStart(offset); |
|
57 |
} catch (IOException e) |
|
58 |
{ |
|
59 |
System.out.println("Cannot go back to start in First Words."); |
|
60 |
break; |
|
61 |
} |
|
62 |
readUntilNewLine(); |
|
63 |
pos = in.getAbsPos(); |
|
64 |
str = getName(); |
|
65 |
first_words.addElement(new FirstWord(str, pos)); |
|
66 |
System.out.println("New first word: " + |
|
67 |
str + " - " + new Integer(pos).toString()); |
|
68 |
} |
|
69 |
||
70 |
try { |
|
71 |
in.seekFromStart(0); |
|
72 |
} catch (IOException e) |
|
73 |
{ |
|
74 |
System.out.println("Cannot go back to start in First Words."); |
|
75 |
return; |
|
76 |
} |
|
77 |
} |
|
78 |
||
79 |
public int [] getBlockBounds(String word) |
|
80 |
{ |
|
81 |
int i; |
|
82 |
int res[] = new int[2]; |
|
83 |
||
84 |
res[0] = 0; |
|
85 |
res[1] = top_index; |
|
86 |
||
87 |
for(i=0; i < first_words.size(); ++i) |
|
88 |
{ |
|
89 |
FirstWord tmp; |
|
90 |
int cmp; |
|
91 |
int pos; |
|
92 |
tmp = (FirstWord) first_words.elementAt(i); |
|
93 |
pos = tmp.getPos(); |
|
94 |
cmp = word.compareTo(tmp.getString()); |
|
95 |
if (cmp <= 0) |
|
96 |
{ |
|
97 |
res[1] = pos; |
|
98 |
break; |
|
99 |
} |
|
100 |
||
101 |
res[0] = pos; |
|
102 |
} |
|
103 |
||
104 |
return res; |
|
105 |
} |
|
106 |
||
107 |
private void get_top_index() |
|
11 | 108 |
{ |
109 |
InputStream bfile = getClass().getResourceAsStream("/dicts/TOPINDEX"); |
|
110 |
/* 10 bytes is enough for a simple int */ |
|
111 |
byte array[] = new byte[10]; |
|
112 |
int total; |
|
113 |
try { |
|
114 |
total = bfile.read(array, 0, 10); |
|
115 |
} catch (IOException e) |
|
116 |
{ |
|
117 |
System.out.println("Cannot open TOPINDEX."); |
|
118 |
top_index = 0; |
|
119 |
return; |
|
120 |
} |
|
121 |
/* This will have '\n' */ |
|
122 |
String str = new String(array, 0, total - 1 /* - \n */); |
|
123 |
top_index = Integer.parseInt(str); |
|
124 |
System.out.println("Top Index: " + |
|
125 |
new Integer(top_index).toString()); |
|
0 | 126 |
} |
127 |
||
36 | 128 |
public String getName() |
0 | 129 |
{ |
25 | 130 |
byte tmp[] = new byte[100]; |
0 | 131 |
int i; |
132 |
||
133 |
i = 0; |
|
134 |
do |
|
135 |
{ |
|
136 |
try { |
|
137 |
int c = in.read(); |
|
138 |
if (c == -1) |
|
139 |
{ |
|
2 | 140 |
System.out.println("EOF in getName"); |
141 |
break; |
|
0 | 142 |
} |
143 |
tmp[i] = (byte) c; |
|
144 |
} catch (IOException e) |
|
145 |
{ |
|
146 |
System.out.println("IO Exception: " + e.getMessage()); |
|
2 | 147 |
break; |
0 | 148 |
} |
149 |
i += 1; |
|
25 | 150 |
} while (tmp[i-1] != '\t' /* tab */ && i < 100); |
0 | 151 |
|
2 | 152 |
if (i == 51 || i == 0) |
0 | 153 |
return null; |
154 |
||
8 | 155 |
String result; |
156 |
try { |
|
157 |
result = new String(tmp, 0, i-1, "UTF-8"); |
|
158 |
} catch (UnsupportedEncodingException e) |
|
159 |
{ |
|
160 |
System.out.println("Unsupported encoding."); |
|
161 |
return null; |
|
162 |
} |
|
36 | 163 |
|
8 | 164 |
return result; |
0 | 165 |
} |
166 |
||
36 | 167 |
public byte[] getNameBytes() |
168 |
{ |
|
169 |
byte tmp[] = new byte[100]; |
|
170 |
int i; |
|
171 |
||
172 |
i = 0; |
|
173 |
do |
|
174 |
{ |
|
175 |
try { |
|
176 |
int c = in.read(); |
|
177 |
if (c == -1) |
|
178 |
{ |
|
179 |
System.out.println("EOF in getName"); |
|
180 |
break; |
|
181 |
} |
|
182 |
tmp[i] = (byte) c; |
|
183 |
} catch (IOException e) |
|
184 |
{ |
|
185 |
System.out.println("IO Exception: " + e.getMessage()); |
|
186 |
break; |
|
187 |
} |
|
188 |
i += 1; |
|
189 |
} while (tmp[i-1] != '\t' /* tab */ && i < 100); |
|
190 |
||
191 |
if (i == 51 || i == 0) |
|
192 |
return null; |
|
193 |
||
194 |
return tmp; |
|
195 |
} |
|
196 |
||
0 | 197 |
public int getLength() |
198 |
{ |
|
199 |
/* We reuse getOffset, as it breaks on \n too */ |
|
200 |
return getOffset(); |
|
201 |
} |
|
202 |
||
203 |
public int getOffset() |
|
204 |
{ |
|
25 | 205 |
byte tmp[] = new byte[100]; |
0 | 206 |
int i; |
207 |
||
208 |
i = 0; |
|
209 |
do |
|
210 |
{ |
|
211 |
try { |
|
212 |
int c = in.read(); |
|
213 |
if (c == -1) |
|
214 |
{ |
|
2 | 215 |
System.out.println("EOF in getOffest"); |
0 | 216 |
return -1; |
217 |
} |
|
218 |
tmp[i] = (byte) c; |
|
219 |
} catch (IOException e) |
|
220 |
{ |
|
221 |
System.out.println("IO Exception: " + e.getMessage()); |
|
222 |
return -1; |
|
223 |
} |
|
224 |
i += 1; |
|
25 | 225 |
} while (tmp[i-1] != '\t' /* tab */ && tmp[i-1] != '\n' && i < 100); |
0 | 226 |
|
227 |
if (i == 51) |
|
228 |
return -1; |
|
229 |
||
230 |
return IA5toNumber(tmp, i-1); |
|
231 |
} |
|
232 |
||
233 |
public void readUntilNewLine() |
|
234 |
{ |
|
235 |
byte tmp; |
|
236 |
||
237 |
do |
|
238 |
{ |
|
239 |
try { |
|
240 |
int c = in.read(); |
|
241 |
if (c == -1) |
|
242 |
{ |
|
2 | 243 |
System.out.println("EOF in readUntilNewLine"); |
0 | 244 |
return; |
245 |
} |
|
246 |
tmp = (byte) c; |
|
247 |
} catch (IOException e) |
|
248 |
{ |
|
249 |
System.out.println("IO Exception: " + e.getMessage()); |
|
250 |
return; |
|
251 |
} |
|
252 |
} while (tmp != '\n'); |
|
253 |
||
254 |
} |
|
255 |
||
256 |
public int IA5value(byte letter) |
|
257 |
{ |
|
258 |
switch(letter) |
|
259 |
{ |
|
260 |
case 'A': return 0; |
|
261 |
case 'B': return 1; |
|
262 |
case 'C': return 2; |
|
263 |
case 'D': return 3; |
|
264 |
case 'E': return 4; |
|
265 |
case 'F': return 5; |
|
266 |
case 'G': return 6; |
|
267 |
case 'H': return 7; |
|
268 |
case 'I': return 8; |
|
269 |
case 'J': return 9; |
|
270 |
case 'K': return 10; |
|
271 |
case 'L': return 11; |
|
272 |
case 'M': return 12; |
|
273 |
case 'N': return 13; |
|
274 |
case 'O': return 14; |
|
275 |
case 'P': return 15; |
|
276 |
case 'Q': return 16; |
|
277 |
case 'R': return 17; |
|
278 |
case 'S': return 18; |
|
279 |
case 'T': return 19; |
|
280 |
case 'U': return 20; |
|
281 |
case 'V': return 21; |
|
282 |
case 'W': return 22; |
|
283 |
case 'X': return 23; |
|
284 |
case 'Y': return 24; |
|
285 |
case 'Z': return 25; |
|
286 |
case 'a': return 26; |
|
287 |
case 'b': return 27; |
|
288 |
case 'c': return 28; |
|
289 |
case 'd': return 29; |
|
290 |
case 'e': return 30; |
|
291 |
case 'f': return 31; |
|
292 |
case 'g': return 32; |
|
293 |
case 'h': return 33; |
|
294 |
case 'i': return 34; |
|
295 |
case 'j': return 35; |
|
296 |
case 'k': return 36; |
|
297 |
case 'l': return 37; |
|
298 |
case 'm': return 38; |
|
299 |
case 'n': return 39; |
|
300 |
case 'o': return 40; |
|
301 |
case 'p': return 41; |
|
302 |
case 'q': return 42; |
|
303 |
case 'r': return 43; |
|
304 |
case 's': return 44; |
|
305 |
case 't': return 45; |
|
306 |
case 'u': return 46; |
|
307 |
case 'v': return 47; |
|
308 |
case 'w': return 48; |
|
309 |
case 'x': return 49; |
|
310 |
case 'y': return 50; |
|
311 |
case 'z': return 51; |
|
312 |
case '0': return 52; |
|
313 |
case '1': return 53; |
|
314 |
case '2': return 54; |
|
315 |
case '3': return 55; |
|
316 |
case '4': return 56; |
|
317 |
case '5': return 57; |
|
318 |
case '6': return 58; |
|
319 |
case '7': return 59; |
|
320 |
case '8': return 60; |
|
321 |
case '9': return 61; |
|
322 |
case '+': return 62; |
|
323 |
case '/': return 63; |
|
324 |
default: |
|
325 |
return 0; |
|
326 |
} |
|
327 |
} |
|
328 |
||
329 |
public int IA5toNumber(byte array[], int length) |
|
330 |
{ |
|
331 |
int i = 0; |
|
332 |
int value = 0; |
|
333 |
||
334 |
while (i < length) |
|
335 |
{ |
|
15 | 336 |
/* DEBUG |
0 | 337 |
System.out.println("Value1: " + new Integer(value).toString()); |
338 |
System.out.println("Array[i]: " + (char) array[i]); |
|
339 |
System.out.println("IA5Value: " + |
|
340 |
new Integer(IA5value(array[i])).toString()); |
|
15 | 341 |
*/ |
0 | 342 |
|
343 |
value = IA5value(array[i]) + value * 64; |
|
344 |
||
32 | 345 |
/*System.out.println("Value2: " + new Integer(value).toString()); |
346 |
* */ |
|
0 | 347 |
i += 1; |
348 |
} |
|
349 |
return value; |
|
350 |
} |
|
351 |
||
352 |
public String EntryToString() |
|
353 |
{ |
|
354 |
String name = getName(); |
|
355 |
int offset = getOffset(); |
|
356 |
int length = getLength(); |
|
357 |
||
358 |
return new String(name + " " + new Integer(offset).toString() + |
|
359 |
" " + new Integer(length).toString()); |
|
360 |
} |
|
361 |
||
362 |
public boolean WordMatches(String w1, String w2) |
|
363 |
{ |
|
2 | 364 |
/* System.out.println("Comparing " + w1 + " to " + w2);*/ |
0 | 365 |
if (w1.equals(w2)) |
366 |
return true; |
|
367 |
return false; |
|
368 |
} |
|
369 |
||
370 |
public Vector SearchDefinition(String word) |
|
371 |
{ |
|
14 | 372 |
return SearchDefinition(word, -1); |
373 |
} |
|
374 |
||
375 |
/* if max >0, limit the search. */ |
|
376 |
public Vector SearchDefinition(String word, int max) |
|
377 |
{ |
|
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
378 |
try { |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
379 |
in.seekFromStart(0); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
380 |
} catch (IOException e) |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
381 |
{ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
382 |
System.out.println("Cannot go back to start in search def."); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
383 |
} |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
384 |
|
0 | 385 |
Vector results = new Vector(); |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
386 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
387 |
return SearchNextDefinition(results, word, max); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
388 |
} |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
389 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
390 |
/* if max >0, limit the search. */ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
391 |
public Vector SearchNextDefinition(Vector results, String word, int max) |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
392 |
{ |
14 | 393 |
int count = 0; |
394 |
System.out.println("Searching " + word); |
|
0 | 395 |
|
396 |
String test; |
|
397 |
do |
|
398 |
{ |
|
14 | 399 |
if (max > 0 && count > max) |
400 |
{ |
|
401 |
/* Void results */ |
|
402 |
break; |
|
403 |
} |
|
404 |
test = getName(); |
|
0 | 405 |
|
406 |
if (test == null) |
|
407 |
break; |
|
408 |
if (WordMatches(word,test)) |
|
409 |
{ |
|
410 |
int offset = getOffset(); |
|
411 |
int length = getLength(); |
|
412 |
System.out.println("Definition for " + word + " at " + |
|
413 |
new Integer(offset).toString() + " length " + |
|
414 |
new Integer(length).toString()); |
|
19 | 415 |
/*String definition = defs.getDefinition(offset, length);*/ |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
416 |
Vorto vorto = new Vorto(test, offset, length); |
0 | 417 |
|
19 | 418 |
results.addElement(vorto); |
0 | 419 |
} else |
420 |
{ |
|
421 |
readUntilNewLine(); |
|
422 |
} |
|
14 | 423 |
++count; |
0 | 424 |
} while (test != null); |
425 |
||
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
426 |
return results; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
427 |
} |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
428 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
429 |
/* if max >0, limit the search. */ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
430 |
public Vector SearchNextPrefixes(Vector results, String word, int max) |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
431 |
{ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
432 |
int count = 0; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
433 |
System.out.println("Searching " + word); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
434 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
435 |
String test; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
436 |
do |
14 | 437 |
{ |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
438 |
if (max > 0 && count > max) |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
439 |
{ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
440 |
/* Void results */ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
441 |
break; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
442 |
} |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
443 |
test = getName(); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
444 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
445 |
if (test == null) |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
446 |
break; |
43
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
447 |
boolean does_start_with; |
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
448 |
does_start_with = test.startsWith(word); |
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
449 |
System.out.println("Prefix linear comparing to " + test + |
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
450 |
": " + does_start_with); |
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
451 |
if (does_start_with) |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
452 |
{ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
453 |
int offset = getOffset(); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
454 |
int length = getLength(); |
28 | 455 |
System.out.println("Definition for " + word + "(" |
456 |
+ test + ") at " + |
|
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
457 |
new Integer(offset).toString() + " length " + |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
458 |
new Integer(length).toString()); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
459 |
/*String definition = defs.getDefinition(offset, length);*/ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
460 |
Vorto vorto = new Vorto(test, offset, length); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
461 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
462 |
results.addElement(vorto); |
32 | 463 |
/* |
28 | 464 |
System.out.println("Until now: " + |
32 | 465 |
new Integer(results.size()).toString()); */ |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
466 |
} else |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
467 |
{ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
468 |
break; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
469 |
} |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
470 |
++count; |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
471 |
} while (test != null); |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
472 |
|
0 | 473 |
return results; |
474 |
} |
|
11 | 475 |
|
36 | 476 |
private boolean byteStartsWith(byte a1[], byte a2[]) |
477 |
{ |
|
478 |
int i; |
|
479 |
||
480 |
i = 0; |
|
481 |
do |
|
482 |
{ |
|
483 |
if (a1[i] != a2[i]) |
|
484 |
return false; |
|
485 |
++i; |
|
486 |
} |
|
487 |
while (i < a1.length); |
|
488 |
||
489 |
return true; |
|
490 |
} |
|
491 |
||
492 |
private int byteCompare(byte a1[], byte a2[]) |
|
493 |
{ |
|
494 |
int i; |
|
495 |
int min; |
|
496 |
||
497 |
min = a1.length; |
|
498 |
if (a2.length < min) |
|
499 |
min = a2.length; |
|
500 |
||
501 |
i = 0; |
|
502 |
do |
|
503 |
{ |
|
504 |
if (a1[i] < a2[i]) |
|
505 |
return -1; |
|
506 |
else if (a1[i] > a2[i]) |
|
507 |
return 1; |
|
508 |
++i; |
|
509 |
} |
|
510 |
while (i < min); |
|
511 |
||
512 |
if (i < a1.length) |
|
513 |
return 1; |
|
514 |
else if (i < a2.length) |
|
515 |
return -1; |
|
516 |
||
517 |
return 0; |
|
518 |
} |
|
519 |
||
520 |
public String bytesToString(byte a[]) |
|
521 |
{ |
|
522 |
String result; |
|
523 |
try { |
|
524 |
result = new String(a, 0, a.length, "UTF-8"); |
|
525 |
} catch (UnsupportedEncodingException e) |
|
526 |
{ |
|
527 |
System.out.println("Unsupported encoding."); |
|
528 |
result = null; |
|
529 |
} |
|
530 |
||
531 |
return result; |
|
532 |
} |
|
533 |
||
534 |
public byte[] stringToBytes(String s) |
|
535 |
{ |
|
536 |
byte res[] = {}; |
|
537 |
||
538 |
try { |
|
539 |
res = s.getBytes("UTF-8"); |
|
540 |
} catch (UnsupportedEncodingException e) |
|
541 |
{ |
|
542 |
System.out.println("Unsupported encoding."); |
|
543 |
} |
|
544 |
||
545 |
return res; |
|
546 |
} |
|
547 |
||
11 | 548 |
public Vector BinarySearchDefinition(String word) |
549 |
{ |
|
550 |
Vector results = new Vector(); |
|
37 | 551 |
int bounds[]; |
11 | 552 |
int pivot = top_index / 2; |
553 |
int step = top_index / 2; |
|
31 | 554 |
boolean found = false; |
43
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
555 |
boolean found_laste = false; |
38 | 556 |
String test; |
11 | 557 |
|
38 | 558 |
/* |
559 |
System.gc(); |
|
560 |
*/ |
|
561 |
Main.showMem(); |
|
36 | 562 |
|
37 | 563 |
bounds = getBlockBounds(word); |
564 |
final int min = bounds[0]; |
|
565 |
final int max = bounds[1]; |
|
566 |
pivot = min + ((max - min) / 2); |
|
567 |
step = (max - min) / 2; |
|
568 |
||
569 |
System.out.println("Block bounds: " + |
|
570 |
new Integer(min).toString() + " to " + |
|
571 |
new Integer(max).toString()); |
|
572 |
||
38 | 573 |
/* |
36 | 574 |
targetword = stringToBytes(word); |
575 |
if (targetword.length == 0) |
|
576 |
return null; |
|
38 | 577 |
*/ |
36 | 578 |
|
11 | 579 |
do |
580 |
{ |
|
28 | 581 |
/* |
582 |
* System.out.println("Pivoting to " + |
|
11 | 583 |
new Integer(pivot).toString()); |
28 | 584 |
*/ |
11 | 585 |
try { |
586 |
in.seekFromStart(pivot); |
|
587 |
} catch (IOException e) |
|
588 |
{ |
|
589 |
System.out.println("Seek from start error"); |
|
590 |
return results; |
|
591 |
} |
|
38 | 592 |
/*System.out.println("Searching new line");*/ |
11 | 593 |
readUntilNewLine(); |
38 | 594 |
/*System.out.println("Getting name bytes");*/ |
595 |
test = getName(); |
|
31 | 596 |
if (test == null) /* EOF probably */ |
597 |
{ |
|
598 |
/* if EOF, go back */ |
|
599 |
step = step / 2; |
|
600 |
pivot = pivot - step; |
|
601 |
continue; |
|
602 |
} |
|
38 | 603 |
int comparision = word.compareTo(test); |
43
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
604 |
System.out.println("Binary comparing to " + test + |
38 | 605 |
": " + new Integer(comparision).toString()); |
28 | 606 |
if (comparision <= 0) |
11 | 607 |
{ |
31 | 608 |
/* If == 0, then we don't know that it is the |
609 |
* FIRST match possible in the dictionary. |
|
610 |
* There may be more than one entry for the same word, and |
|
611 |
* we may not have found still the first. */ |
|
11 | 612 |
step = step / 2; |
613 |
pivot = pivot - step; |
|
614 |
} else if (comparision > 0) |
|
615 |
{ |
|
616 |
step = step / 2; |
|
617 |
pivot = pivot + step; |
|
618 |
} |
|
619 |
} while (step > 0); |
|
620 |
||
31 | 621 |
/* If we found the exact word in a non-last comparision, |
622 |
* it's possible that the final binary search points us |
|
623 |
* to the word PREVIOUS to the good match. */ |
|
43
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
624 |
if (!test.startsWith(word)) |
31 | 625 |
{ |
43
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
626 |
System.out.println("The word " + test + |
988a367ae8d2
Fixing a bug on the threshold between Binary Search and Prefix Sequence search
viric@llimona
parents:
38
diff
changeset
|
627 |
" doesn't start with " + word); |
31 | 628 |
readUntilNewLine(); |
629 |
} |
|
630 |
else |
|
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
631 |
{ |
21
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
632 |
try { |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
633 |
in.seekFromStart(pivot); |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
634 |
} catch (IOException e) |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
635 |
{ |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
636 |
System.out.println("Seek from start error"); |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
637 |
return results; |
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
638 |
} |
35
d5425eaec624
There was still a bug searching. One more fixed.
viric@llimona
parents:
32
diff
changeset
|
639 |
readUntilNewLine(); |
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
640 |
} |
21
0739404e26dc
Fixed prefix results when the prefix matches a word.
viric@mandarina
parents:
20
diff
changeset
|
641 |
|
20
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
642 |
|
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
643 |
/* Add the prefixes */ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
644 |
SearchNextPrefixes(results, word, 49); /* Max 50 prefixes. Match+nexts*/ |
2f815ca5cb8c
Added prefix search and 'Transliterigo' saving in record.
viric@mandarina
parents:
19
diff
changeset
|
645 |
|
11 | 646 |
return results; |
647 |
} |
|
0 | 648 |
} |