src/jdict/DictIndex.java
author viric@llimona
Sun, 29 Jul 2007 23:28:51 +0200
changeset 25 764c48b54327
parent 21 0739404e26dc
child 28 dccb4ae23699
permissions -rw-r--r--
Riparitaj eraretoj.

package jdict;

import java.io.*;
import java.util.Vector;

public class DictIndex
{
    BlockFile in;

    String index_basename;
    String defs_basename;
    int index_id;

    int top_index;

    public DictIndex(String dictname)
    {
        index_basename = "/dicts/x-" + dictname + ".index";
        in = new BlockFile(index_basename);
        get_top_index();
    }

    public void get_top_index()
    {
        InputStream bfile = getClass().getResourceAsStream("/dicts/TOPINDEX");
        /* 10 bytes is enough for a simple int */
        byte array[] = new byte[10];
        int total;
        try {
            total = bfile.read(array, 0, 10);
        } catch (IOException e)
        {
            System.out.println("Cannot open TOPINDEX.");
            top_index = 0;
            return;
        }
        /* This will have '\n' */
        String str = new String(array, 0, total - 1 /* - \n */);
        top_index = Integer.parseInt(str);
        System.out.println("Top Index: " +
                new Integer(top_index).toString());
    }

    public String getName()
    {
        byte tmp[] = new byte[100];
        int i;

        i = 0;
        do
        {
            try {
                int c = in.read();
                if (c == -1)
                {
                    System.out.println("EOF in getName");
                    break;
                }
                tmp[i] = (byte) c;
            } catch (IOException e)
            {
                System.out.println("IO Exception: " +  e.getMessage());
                break;
            }
            i += 1;
        } while (tmp[i-1] != '\t' /* tab */ && i < 100);

        if (i == 51 || i == 0)
            return null;

        String result;
        try {
            result = new String(tmp, 0, i-1, "UTF-8");
        } catch (UnsupportedEncodingException e)
        {
            System.out.println("Unsupported encoding.");
            return null;
        }
        return result;
    }

    public int getLength()
    {
        /* We reuse getOffset, as it breaks on \n too */
        return getOffset();
    }

    public int getOffset()
    {
        byte tmp[] = new byte[100];
        int i;

        i = 0;
        do
        {
            try {
                int c = in.read();
                if (c == -1)
                {
                    System.out.println("EOF in getOffest");
                    return -1;
                }
                tmp[i] = (byte) c;
            } catch (IOException e)
            {
                System.out.println("IO Exception: " +  e.getMessage());
                return -1;
            }
            i += 1;
        } while (tmp[i-1] != '\t' /* tab */ && tmp[i-1] != '\n' && i < 100);

        if (i == 51)
            return -1;

        return IA5toNumber(tmp, i-1);
    }

    public void readUntilNewLine()
    {
        byte tmp;

        do
        {
            try {
                int c = in.read();
                if (c == -1)
                {
                    System.out.println("EOF in readUntilNewLine");
                    return;
                }
                tmp = (byte) c;
            } catch (IOException e)
            {
                System.out.println("IO Exception: " +  e.getMessage());
                return;
            }
        } while (tmp != '\n');

    }

    public int IA5value(byte letter)
    {
        switch(letter)
        {
            case 'A': return 0;
            case 'B': return 1;
            case 'C': return 2;
            case 'D': return 3;
            case 'E': return 4;
            case 'F': return 5;
            case 'G': return 6;
            case 'H': return 7;
            case 'I': return 8;
            case 'J': return 9;
            case 'K': return 10;
            case 'L': return 11;
            case 'M': return 12;
            case 'N': return 13;
            case 'O': return 14;
            case 'P': return 15;
            case 'Q': return 16;
            case 'R': return 17;
            case 'S': return 18;
            case 'T': return 19;
            case 'U': return 20;
            case 'V': return 21;
            case 'W': return 22;
            case 'X': return 23;
            case 'Y': return 24;
            case 'Z': return 25;
            case 'a': return 26;
            case 'b': return 27;
            case 'c': return 28;
            case 'd': return 29;
            case 'e': return 30;
            case 'f': return 31;
            case 'g': return 32;
            case 'h': return 33;
            case 'i': return 34;
            case 'j': return 35;
            case 'k': return 36;
            case 'l': return 37;
            case 'm': return 38;
            case 'n': return 39;
            case 'o': return 40;
            case 'p': return 41;
            case 'q': return 42;
            case 'r': return 43;
            case 's': return 44;
            case 't': return 45;
            case 'u': return 46;
            case 'v': return 47;
            case 'w': return 48;
            case 'x': return 49;
            case 'y': return 50;
            case 'z': return 51;
            case '0': return 52;
            case '1': return 53;
            case '2': return 54;
            case '3': return 55;
            case '4': return 56;
            case '5': return 57;
            case '6': return 58;
            case '7': return 59;
            case '8': return 60;
            case '9': return 61;
            case '+': return 62;
            case '/': return 63;
            default:
                      return 0;
        }
    }

    public int IA5toNumber(byte array[], int length)
    {
        int i = 0;
        int value = 0;

        while (i < length)
        {
            /* DEBUG
            System.out.println("Value1: " + new Integer(value).toString());
            System.out.println("Array[i]: " + (char) array[i]);
            System.out.println("IA5Value: " +
                    new Integer(IA5value(array[i])).toString());
                    */

            value = IA5value(array[i]) + value * 64;

            System.out.println("Value2: " + new Integer(value).toString());
            i += 1;
        }
        return value;
    }

    public String EntryToString()
    {
        String name = getName();
        int offset = getOffset();
        int length = getLength();

        return new String(name + " " + new Integer(offset).toString() +
                " " + new Integer(length).toString());
    }

    public boolean WordMatches(String w1, String w2)
    {
        /* System.out.println("Comparing " + w1 + " to " + w2);*/
        if (w1.equals(w2))
            return true;
        return false;
    }

    public Vector SearchDefinition(String word)
    {
        return SearchDefinition(word, -1);
    }

    /* if max >0, limit the search. */
    public Vector SearchDefinition(String word, int max)
    {
        try {
            in.seekFromStart(0);
        } catch (IOException e)
        {
            System.out.println("Cannot go back to start in search def.");
        }

        Vector results = new Vector();

        return SearchNextDefinition(results, word, max);
    }

    /* if max >0, limit the search. */
    public Vector SearchNextDefinition(Vector results, String word, int max)
    {
        int count = 0;
        System.out.println("Searching " + word);

        String test;
        do
        {
            if (max > 0 && count > max)
            {
                /* Void results */
                break;
            }
            test = getName();

            if (test == null)
                break;
            if (WordMatches(word,test))
            {
                int offset = getOffset();
                int length = getLength();
                System.out.println("Definition for " + word + " at " +
                        new Integer(offset).toString() +  " length " +
                        new Integer(length).toString());
                /*String definition = defs.getDefinition(offset, length);*/
                Vorto vorto = new Vorto(test, offset, length);

                results.addElement(vorto);
            } else
            {
                readUntilNewLine();
            }
            ++count;
        } while (test != null);

        return results;
    }

    /* if max >0, limit the search. */
    public Vector SearchNextPrefixes(Vector results, String word, int max)
    {
        int count = 0;
        System.out.println("Searching " + word);

        String test;
        do
        {
            if (max > 0 && count > max)
            {
                /* Void results */
                break;
            }
            test = getName();

            if (test == null)
                break;
            if (test.startsWith(word))
            {
                int offset = getOffset();
                int length = getLength();
                System.out.println("Definition for " + word + " at " +
                        new Integer(offset).toString() +  " length " +
                        new Integer(length).toString());
                /*String definition = defs.getDefinition(offset, length);*/
                Vorto vorto = new Vorto(test, offset, length);

                results.addElement(vorto);
                readUntilNewLine();
            } else
            {
                break;
            }
            ++count;
        } while (test != null);

        return results;
    }

    public Vector BinarySearchDefinition(String word)
    {
        Vector results = new Vector();
        int pivot = top_index / 2;
        int step = top_index / 2;

        String test;
        do
        {
            System.out.println("Pivoting to " +
                    new Integer(pivot).toString());
            try {
                in.seekFromStart(pivot);
            } catch (IOException e)
            {
                System.out.println("Seek from start error");
                return results;
            }
            readUntilNewLine();
            test = getName();
            if (test == null)
                break;
            int comparision = word.compareTo(test);
            System.out.println("Comparing to " + test);
            if (comparision == 0)
            {
                int offset = getOffset();
                int length = getLength();
                System.out.println("Definition for " + word + " at " +
                        new Integer(offset).toString() +  " length " +
                        new Integer(length).toString());

                /*String definition = defs.getDefinition(offset, length);*/
                Vorto vorto = new Vorto(test, offset, length);

                results.addElement(vorto);
                break;
            } else if (comparision < 0)
            {
                step = step / 2;
                pivot = pivot - step;
            } else if (comparision > 0)
            {
                step = step / 2;
                pivot = pivot + step;
            }
        } while (step > 0);

        /* If we didn't find a word, replace the pivot */
        if (results.size() == 0)
        {
            try {
                in.seekFromStart(pivot);
            } catch (IOException e)
            {
                System.out.println("Seek from start error");
                return results;
            }
        }

        readUntilNewLine();

        /* Add the prefixes */
        SearchNextPrefixes(results, word, 49); /* Max 50 prefixes. Match+nexts*/

        return results;
    }
}