load.c
author viric@llimona
Sun, 02 Sep 2007 15:57:34 +0200
changeset 25 8d524bb8dcea
parent 6 bc41369f4587
permissions -rw-r--r--
Changed the name for zprocess to prepare_akcentiga

#include <stdio.h>

#include "dictre.h"

enum
{
    MAX=500000
};

struct Word words[MAX];
int nwords;
struct Def defs[MAX];
int ndefs;
int dont_touch[20];
int ndont_touch;

void init_load()
{
    ndefs = 0;
    nwords = 0;
    ndont_touch = 0;
}

static void new_word(struct Word *from)
{
    memcpy(&words[nwords], from, sizeof(*from));
    nwords++;
}

static void new_dont_touch(int n)
{
    dont_touch[ndont_touch++] = n;
}

static int new_def(char *def, int offset, int length)
{
    defs[ndefs].d = def;
    defs[ndefs].offset = offset;
    defs[ndefs].length = length;
    return ndefs++;
}

static int search_def(int offset, int length)
{
    int i;

    for(i=0; i < ndefs; ++i)
    {
        if (defs[i].offset == offset &&
                defs[i].length == length)
            return i;
    }
    return -1;
}

static void print_word(struct Word *w)
{
    printf("%s\t%i\n", w->w, w->def);
}

void load_dictionary(FILE *index, FILE *fdefs)
{
    struct Word w;
    int last_offset = 0;
    int def_avoided = 0;
    int numword = 0;;
    static int dispnwords = 0;

    do {
        int offset, length;
        char *defstr;
        w.w = get_word(index);
        /*numword++;
        printf("words: %i\n", numword);*/
        if (w.w == 0)
            break;
        /*printf("Word: %s\n", w.w);*/
        offset = get_int(index);
        length = get_int(index);
        if (offset > last_offset)
        {
            w.def = -1;
            last_offset = offset;
        }
        else
            w.def = search_def(offset, length);
        if (w.def == -1) 
        {
            /* New definition */
            int newindex, repindex;
            defstr = get_def(fdefs, offset, length);
            newindex = new_def(defstr, offset, length);
            /*
            printf("Length %i (%s): %i\n", newindex, w.w, length);
            */
            
            /* Store it in the hash for repeated defs */
            repindex = def_repeated(&defs[newindex]);
            if (repindex != -1) 
            {
                def_avoided += 1;
                /*
                printf("Repeated def avoided %i (for def %i)"
                        " (%s)\n%i %s\n%i %s\n",
                        def_avoided, repindex, w.w,
                        length, defstr,
                        defs[repindex].length, defs[repindex].d);
                        */
                remove_def(newindex);
                newindex = repindex;
            } else
                new_hashdef(&defs[newindex], newindex);

            /* Store the final index */
            w.def = newindex;
        }
        /* sizeof -1  instead of strlen() */
        if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
                new_dont_touch(w.def);
        new_word(&w);

        /* stdout Display */
        dispnwords++;
        if (dispnwords >= 1000)
        {
            dispnwords = 0;
            printf("Loaded: %i Repeated definitions avoided: %i\n", nwords,
                    def_avoided);
        }

    } while(1);
}

void print_words()
{
    int i;

    for(i=0; i < nwords; ++i)
        print_word(&words[i]);
}