Byte-array comparision in binary search.
--- a/src/jdict/DictIndex.java Wed Aug 08 23:09:44 2007 +0200
+++ b/src/jdict/DictIndex.java Wed Aug 08 23:48:41 2007 +0200
@@ -41,7 +41,7 @@
new Integer(top_index).toString());
}
- public String getName()
+ public String getName()
{
byte tmp[] = new byte[100];
int i;
@@ -76,9 +76,40 @@
System.out.println("Unsupported encoding.");
return null;
}
+
return result;
}
+ public byte[] getNameBytes()
+ {
+ byte tmp[] = new byte[100];
+ int i;
+
+ i = 0;
+ do
+ {
+ try {
+ int c = in.read();
+ if (c == -1)
+ {
+ System.out.println("EOF in getName");
+ break;
+ }
+ tmp[i] = (byte) c;
+ } catch (IOException e)
+ {
+ System.out.println("IO Exception: " + e.getMessage());
+ break;
+ }
+ i += 1;
+ } while (tmp[i-1] != '\t' /* tab */ && i < 100);
+
+ if (i == 51 || i == 0)
+ return null;
+
+ return tmp;
+ }
+
public int getLength()
{
/* We reuse getOffset, as it breaks on \n too */
@@ -354,14 +385,92 @@
return results;
}
+ private boolean byteStartsWith(byte a1[], byte a2[])
+ {
+ int i;
+
+ i = 0;
+ do
+ {
+ if (a1[i] != a2[i])
+ return false;
+ ++i;
+ }
+ while (i < a1.length);
+
+ return true;
+ }
+
+ private int byteCompare(byte a1[], byte a2[])
+ {
+ int i;
+ int min;
+
+ min = a1.length;
+ if (a2.length < min)
+ min = a2.length;
+
+ i = 0;
+ do
+ {
+ if (a1[i] < a2[i])
+ return -1;
+ else if (a1[i] > a2[i])
+ return 1;
+ ++i;
+ }
+ while (i < min);
+
+ if (i < a1.length)
+ return 1;
+ else if (i < a2.length)
+ return -1;
+
+ return 0;
+ }
+
+ public String bytesToString(byte a[])
+ {
+ String result;
+ try {
+ result = new String(a, 0, a.length, "UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ System.out.println("Unsupported encoding.");
+ result = null;
+ }
+
+ return result;
+ }
+
+ public byte[] stringToBytes(String s)
+ {
+ byte res[] = {};
+
+ try {
+ res = s.getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ System.out.println("Unsupported encoding.");
+ }
+
+ return res;
+ }
+
public Vector BinarySearchDefinition(String word)
{
Vector results = new Vector();
int pivot = top_index / 2;
int step = top_index / 2;
boolean found = false;
+ byte test[];
- String test;
+ byte targetword[] = {};
+
+ targetword = stringToBytes(word);
+ if (targetword.length == 0)
+ return null;
+
do
{
/*
@@ -376,7 +485,7 @@
return results;
}
readUntilNewLine();
- test = getName();
+ test = getNameBytes();
if (test == null) /* EOF probably */
{
/* if EOF, go back */
@@ -384,8 +493,8 @@
pivot = pivot - step;
continue;
}
- int comparision = word.compareTo(test);
- System.out.println("Comparing to " + test);
+ int comparision = byteCompare(targetword, test);
+ System.out.println("Comparing to " + bytesToString(test));
if (comparision <= 0)
{
/* If == 0, then we don't know that it is the
@@ -404,7 +513,7 @@
/* If we found the exact word in a non-last comparision,
* it's possible that the final binary search points us
* to the word PREVIOUS to the good match. */
- if (!test.startsWith(word))
+ if (!byteStartsWith(targetword, test))
{
readUntilNewLine();
}