Byte-array comparision in binary search.
authorviric@llimona
Wed, 08 Aug 2007 23:48:41 +0200
changeset 36 9d677f961a96
parent 35 d5425eaec624
child 37 509497421b36
Byte-array comparision in binary search.
src/jdict/DictIndex.java
--- a/src/jdict/DictIndex.java	Wed Aug 08 23:09:44 2007 +0200
+++ b/src/jdict/DictIndex.java	Wed Aug 08 23:48:41 2007 +0200
@@ -41,7 +41,7 @@
                 new Integer(top_index).toString());
     }
 
-    public String getName()
+    public String  getName()
     {
         byte tmp[] = new byte[100];
         int i;
@@ -76,9 +76,40 @@
             System.out.println("Unsupported encoding.");
             return null;
         }
+
         return result;
     }
 
+    public byte[]  getNameBytes()
+    {
+        byte tmp[] = new byte[100];
+        int i;
+
+        i = 0;
+        do
+        {
+            try {
+                int c = in.read();
+                if (c == -1)
+                {
+                    System.out.println("EOF in getName");
+                    break;
+                }
+                tmp[i] = (byte) c;
+            } catch (IOException e)
+            {
+                System.out.println("IO Exception: " +  e.getMessage());
+                break;
+            }
+            i += 1;
+        } while (tmp[i-1] != '\t' /* tab */ && i < 100);
+
+        if (i == 51 || i == 0)
+            return null;
+
+        return tmp;
+    }
+
     public int getLength()
     {
         /* We reuse getOffset, as it breaks on \n too */
@@ -354,14 +385,92 @@
         return results;
     }
 
+    private boolean byteStartsWith(byte a1[], byte a2[])
+    {
+        int i;
+
+        i = 0;
+        do
+        {
+            if (a1[i] != a2[i])
+                return false;
+            ++i;
+        }
+        while (i < a1.length);
+
+        return true;
+    }
+
+    private int byteCompare(byte a1[], byte a2[])
+    {
+        int i;
+        int min;
+
+        min = a1.length;
+        if (a2.length < min)
+            min = a2.length;
+
+        i = 0;
+        do
+        {
+            if (a1[i] < a2[i])
+                return -1;
+            else if (a1[i] > a2[i])
+                return 1;
+            ++i;
+        }
+        while (i < min);
+
+        if (i < a1.length)
+            return 1;
+        else if (i < a2.length)
+            return -1;
+
+        return 0;
+    }
+
+    public String bytesToString(byte a[])
+    {
+        String result;
+        try {
+            result = new String(a, 0, a.length, "UTF-8");
+        } catch (UnsupportedEncodingException e)
+        {
+            System.out.println("Unsupported encoding.");
+            result = null;
+        }
+
+        return result;
+    }
+
+    public byte[] stringToBytes(String s)
+    {
+        byte res[] = {};
+
+        try {
+            res = s.getBytes("UTF-8");
+        } catch (UnsupportedEncodingException e)
+        {
+            System.out.println("Unsupported encoding.");
+        }
+
+        return res;
+    }
+
     public Vector BinarySearchDefinition(String word)
     {
         Vector results = new Vector();
         int pivot = top_index / 2;
         int step = top_index / 2;
         boolean found = false;
+        byte test[];
 
-        String test;
+        byte targetword[] = {};
+
+        targetword = stringToBytes(word);
+        if (targetword.length == 0)
+            return null;
+
         do
         {
             /*
@@ -376,7 +485,7 @@
                 return results;
             }
             readUntilNewLine();
-            test = getName();
+            test = getNameBytes();
             if (test == null) /* EOF probably */
             {
                 /* if EOF, go back */
@@ -384,8 +493,8 @@
                 pivot = pivot - step;
                 continue;
             }
-            int comparision = word.compareTo(test);
-            System.out.println("Comparing to " + test);
+            int comparision = byteCompare(targetword, test);
+            System.out.println("Comparing to " + bytesToString(test));
             if (comparision <= 0)
             {
                 /* If == 0, then we don't know that it is the
@@ -404,7 +513,7 @@
         /* If we found the exact word in a non-last comparision,
          * it's possible that the final binary search points us
          * to the word PREVIOUS to the good match. */
-        if (!test.startsWith(word))
+        if (!byteStartsWith(targetword, test))
         {
             readUntilNewLine();
         }