package jdict;
import java.io.*;
import java.util.Vector;
public class DictIndex
{
BlockFile in;
String index_basename;
String defs_basename;
int index_id;
int top_index; /* File size of the Index file */
Vector first_words;
public DictIndex(String dictname)
{
index_basename = "/dicts/x-" + dictname + ".index";
in = new BlockFile(index_basename);
get_top_index();
/*
System.gc();
Main.showMem();
*/
prepare_first_words();
in.enableMarks();
}
public void prepare_first_words()
{
int bsize = in.get_block_size();
int max_blocks = top_index / bsize;
int i;
String str;
if (max_blocks * bsize < top_index)
max_blocks++;
first_words = new Vector();
/* We assume offset 0 */
/* First word appart */
str = getName();
first_words.addElement(new FirstWord(str, 0));
/* Next words */
for(i=1; i<max_blocks; ++i)
{
int offset = bsize * i;
int pos;
try {
in.seekFromStart(offset);
} catch (IOException e)
{
System.out.println("Cannot go back to start in First Words.");
break;
}
readUntilNewLine();
pos = in.getAbsPos();
str = getName();
first_words.addElement(new FirstWord(str, pos));
System.out.println("New first word: " +
str + " - " + new Integer(pos).toString());
}
try {
in.seekFromStart(0);
} catch (IOException e)
{
System.out.println("Cannot go back to start in First Words.");
return;
}
}
public int [] getBlockBounds(String word)
{
int i;
int res[] = new int[2];
res[0] = 0;
res[1] = top_index;
for(i=0; i < first_words.size(); ++i)
{
FirstWord tmp;
int cmp;
int pos;
tmp = (FirstWord) first_words.elementAt(i);
pos = tmp.getPos();
cmp = word.compareTo(tmp.getString());
if (cmp <= 0)
{
res[1] = pos;
break;
}
res[0] = pos;
}
return res;
}
private void get_top_index()
{
InputStream bfile = getClass().getResourceAsStream("/dicts/TOPINDEX");
/* 10 bytes is enough for a simple int */
byte array[] = new byte[10];
int total;
try {
total = bfile.read(array, 0, 10);
} catch (IOException e)
{
System.out.println("Cannot open TOPINDEX.");
top_index = 0;
return;
}
/* This will have '\n' */
String str = new String(array, 0, total - 1 /* - \n */);
top_index = Integer.parseInt(str);
System.out.println("Top Index: " +
new Integer(top_index).toString());
}
public String getName()
{
byte tmp[] = new byte[100];
int i;
i = 0;
do
{
try {
int c = in.read();
if (c == -1)
{
System.out.println("EOF in getName");
break;
}
tmp[i] = (byte) c;
} catch (IOException e)
{
System.out.println("IO Exception: " + e.getMessage());
break;
}
i += 1;
} while (tmp[i-1] != '\t' /* tab */ && i < 100);
if (i == 51 || i == 0)
return null;
String result;
try {
result = new String(tmp, 0, i-1, "UTF-8");
} catch (UnsupportedEncodingException e)
{
System.out.println("Unsupported encoding.");
return null;
}
return result;
}
public byte[] getNameBytes()
{
byte tmp[] = new byte[100];
int i;
i = 0;
do
{
try {
int c = in.read();
if (c == -1)
{
System.out.println("EOF in getName");
break;
}
tmp[i] = (byte) c;
} catch (IOException e)
{
System.out.println("IO Exception: " + e.getMessage());
break;
}
i += 1;
} while (tmp[i-1] != '\t' /* tab */ && i < 100);
if (i == 51 || i == 0)
return null;
return tmp;
}
public int getLength()
{
/* We reuse getOffset, as it breaks on \n too */
return getOffset();
}
public int getOffset()
{
byte tmp[] = new byte[100];
int i;
i = 0;
do
{
try {
int c = in.read();
if (c == -1)
{
System.out.println("EOF in getOffest");
return -1;
}
tmp[i] = (byte) c;
} catch (IOException e)
{
System.out.println("IO Exception: " + e.getMessage());
return -1;
}
i += 1;
} while (tmp[i-1] != '\t' /* tab */ && tmp[i-1] != '\n' && i < 100);
if (i == 51)
return -1;
return IA5toNumber(tmp, i-1);
}
public void readUntilNewLine()
{
byte tmp;
do
{
try {
int c = in.read();
if (c == -1)
{
System.out.println("EOF in readUntilNewLine");
return;
}
tmp = (byte) c;
} catch (IOException e)
{
System.out.println("IO Exception: " + e.getMessage());
return;
}
} while (tmp != '\n');
}
public int IA5value(byte letter)
{
switch(letter)
{
case 'A': return 0;
case 'B': return 1;
case 'C': return 2;
case 'D': return 3;
case 'E': return 4;
case 'F': return 5;
case 'G': return 6;
case 'H': return 7;
case 'I': return 8;
case 'J': return 9;
case 'K': return 10;
case 'L': return 11;
case 'M': return 12;
case 'N': return 13;
case 'O': return 14;
case 'P': return 15;
case 'Q': return 16;
case 'R': return 17;
case 'S': return 18;
case 'T': return 19;
case 'U': return 20;
case 'V': return 21;
case 'W': return 22;
case 'X': return 23;
case 'Y': return 24;
case 'Z': return 25;
case 'a': return 26;
case 'b': return 27;
case 'c': return 28;
case 'd': return 29;
case 'e': return 30;
case 'f': return 31;
case 'g': return 32;
case 'h': return 33;
case 'i': return 34;
case 'j': return 35;
case 'k': return 36;
case 'l': return 37;
case 'm': return 38;
case 'n': return 39;
case 'o': return 40;
case 'p': return 41;
case 'q': return 42;
case 'r': return 43;
case 's': return 44;
case 't': return 45;
case 'u': return 46;
case 'v': return 47;
case 'w': return 48;
case 'x': return 49;
case 'y': return 50;
case 'z': return 51;
case '0': return 52;
case '1': return 53;
case '2': return 54;
case '3': return 55;
case '4': return 56;
case '5': return 57;
case '6': return 58;
case '7': return 59;
case '8': return 60;
case '9': return 61;
case '+': return 62;
case '/': return 63;
default:
return 0;
}
}
public int IA5toNumber(byte array[], int length)
{
int i = 0;
int value = 0;
while (i < length)
{
/* DEBUG
System.out.println("Value1: " + new Integer(value).toString());
System.out.println("Array[i]: " + (char) array[i]);
System.out.println("IA5Value: " +
new Integer(IA5value(array[i])).toString());
*/
value = IA5value(array[i]) + value * 64;
/*System.out.println("Value2: " + new Integer(value).toString());
* */
i += 1;
}
return value;
}
public String EntryToString()
{
String name = getName();
int offset = getOffset();
int length = getLength();
return new String(name + " " + new Integer(offset).toString() +
" " + new Integer(length).toString());
}
public boolean WordMatches(String w1, String w2)
{
/* System.out.println("Comparing " + w1 + " to " + w2);*/
if (w1.equals(w2))
return true;
return false;
}
public Vector SearchDefinition(String word)
{
return SearchDefinition(word, -1);
}
/* if max >0, limit the search. */
public Vector SearchDefinition(String word, int max)
{
try {
in.seekFromStart(0);
} catch (IOException e)
{
System.out.println("Cannot go back to start in search def.");
}
Vector results = new Vector();
return SearchNextDefinition(results, word, max);
}
/* if max >0, limit the search. */
public Vector SearchNextDefinition(Vector results, String word, int max)
{
int count = 0;
System.out.println("Searching " + word);
String test;
do
{
if (max > 0 && count > max)
{
/* Void results */
break;
}
test = getName();
if (test == null)
break;
if (WordMatches(word,test))
{
int offset = getOffset();
int length = getLength();
System.out.println("Definition for " + word + " at " +
new Integer(offset).toString() + " length " +
new Integer(length).toString());
/*String definition = defs.getDefinition(offset, length);*/
Vorto vorto = new Vorto(test, offset, length);
results.addElement(vorto);
} else
{
readUntilNewLine();
}
++count;
} while (test != null);
return results;
}
/* if max >0, limit the search. */
public Vector SearchNextPrefixes(Vector results, String word, int max)
{
int count = 0;
System.out.println("Searching " + word);
String test;
do
{
if (max > 0 && count > max)
{
/* Void results */
break;
}
test = getName();
if (test == null)
break;
boolean does_start_with;
does_start_with = test.startsWith(word);
System.out.println("Prefix linear comparing to " + test +
": " + does_start_with);
if (does_start_with)
{
int offset = getOffset();
int length = getLength();
System.out.println("Definition for " + word + "("
+ test + ") at " +
new Integer(offset).toString() + " length " +
new Integer(length).toString());
/*String definition = defs.getDefinition(offset, length);*/
Vorto vorto = new Vorto(test, offset, length);
results.addElement(vorto);
/*
System.out.println("Until now: " +
new Integer(results.size()).toString()); */
} else
{
break;
}
++count;
} while (test != null);
return results;
}
private boolean byteStartsWith(byte a1[], byte a2[])
{
int i;
i = 0;
do
{
if (a1[i] != a2[i])
return false;
++i;
}
while (i < a1.length);
return true;
}
private int byteCompare(byte a1[], byte a2[])
{
int i;
int min;
min = a1.length;
if (a2.length < min)
min = a2.length;
i = 0;
do
{
if (a1[i] < a2[i])
return -1;
else if (a1[i] > a2[i])
return 1;
++i;
}
while (i < min);
if (i < a1.length)
return 1;
else if (i < a2.length)
return -1;
return 0;
}
public String bytesToString(byte a[])
{
String result;
try {
result = new String(a, 0, a.length, "UTF-8");
} catch (UnsupportedEncodingException e)
{
System.out.println("Unsupported encoding.");
result = null;
}
return result;
}
public byte[] stringToBytes(String s)
{
byte res[] = {};
try {
res = s.getBytes("UTF-8");
} catch (UnsupportedEncodingException e)
{
System.out.println("Unsupported encoding.");
}
return res;
}
public Vector BinarySearchDefinition(String word)
{
Vector results = new Vector();
int bounds[];
int pivot = top_index / 2;
int step = top_index / 2;
boolean found = false;
boolean found_laste = false;
String test;
/*
System.gc();
*/
Main.showMem();
bounds = getBlockBounds(word);
final int min = bounds[0];
final int max = bounds[1];
pivot = min + ((max - min) / 2);
step = (max - min) / 2;
System.out.println("Block bounds: " +
new Integer(min).toString() + " to " +
new Integer(max).toString());
/*
targetword = stringToBytes(word);
if (targetword.length == 0)
return null;
*/
do
{
/*
* System.out.println("Pivoting to " +
new Integer(pivot).toString());
*/
try {
in.seekFromStart(pivot);
} catch (IOException e)
{
System.out.println("Seek from start error");
return results;
}
/*System.out.println("Searching new line");*/
readUntilNewLine();
/*System.out.println("Getting name bytes");*/
test = getName();
if (test == null) /* EOF probably */
{
/* if EOF, go back */
step = step / 2;
pivot = pivot - step;
continue;
}
int comparision = word.compareTo(test);
System.out.println("Binary comparing to " + test +
": " + new Integer(comparision).toString());
if (comparision <= 0)
{
/* If == 0, then we don't know that it is the
* FIRST match possible in the dictionary.
* There may be more than one entry for the same word, and
* we may not have found still the first. */
step = step / 2;
pivot = pivot - step;
} else if (comparision > 0)
{
step = step / 2;
pivot = pivot + step;
}
} while (step > 0);
/* If we found the exact word in a non-last comparision,
* it's possible that the final binary search points us
* to the word PREVIOUS to the good match. */
if (!test.startsWith(word))
{
System.out.println("The word " + test +
" doesn't start with " + word);
readUntilNewLine();
}
else
{
try {
in.seekFromStart(pivot);
} catch (IOException e)
{
System.out.println("Seek from start error");
return results;
}
readUntilNewLine();
}
/* Add the prefixes */
SearchNextPrefixes(results, word, 49); /* Max 50 prefixes. Match+nexts*/
return results;
}
}