load.c
author viric <viriketo@gmail.com>
Fri, 30 Mar 2012 18:54:29 +0200
changeset 30 d30178b2a9e6
parent 6 bc41369f4587
permissions -rw-r--r--
Making the makefile not depend on /usr.
viric@0
     1
#include <stdio.h>
viric@0
     2
viric@0
     3
#include "dictre.h"
viric@0
     4
viric@0
     5
enum
viric@0
     6
{
viric@0
     7
    MAX=500000
viric@0
     8
};
viric@0
     9
viric@0
    10
struct Word words[MAX];
viric@0
    11
int nwords;
viric@0
    12
struct Def defs[MAX];
viric@0
    13
int ndefs;
viric@0
    14
int dont_touch[20];
viric@0
    15
int ndont_touch;
viric@0
    16
viric@5
    17
void init_load()
viric@0
    18
{
viric@0
    19
    ndefs = 0;
viric@0
    20
    nwords = 0;
viric@0
    21
    ndont_touch = 0;
viric@0
    22
}
viric@0
    23
viric@0
    24
static void new_word(struct Word *from)
viric@0
    25
{
viric@0
    26
    memcpy(&words[nwords], from, sizeof(*from));
viric@0
    27
    nwords++;
viric@0
    28
}
viric@0
    29
viric@0
    30
static void new_dont_touch(int n)
viric@0
    31
{
viric@0
    32
    dont_touch[ndont_touch++] = n;
viric@0
    33
}
viric@0
    34
viric@0
    35
static int new_def(char *def, int offset, int length)
viric@0
    36
{
viric@0
    37
    defs[ndefs].d = def;
viric@0
    38
    defs[ndefs].offset = offset;
viric@0
    39
    defs[ndefs].length = length;
viric@0
    40
    return ndefs++;
viric@0
    41
}
viric@0
    42
viric@0
    43
static int search_def(int offset, int length)
viric@0
    44
{
viric@0
    45
    int i;
viric@0
    46
viric@0
    47
    for(i=0; i < ndefs; ++i)
viric@0
    48
    {
viric@0
    49
        if (defs[i].offset == offset &&
viric@0
    50
                defs[i].length == length)
viric@0
    51
            return i;
viric@0
    52
    }
viric@0
    53
    return -1;
viric@0
    54
}
viric@0
    55
viric@0
    56
static void print_word(struct Word *w)
viric@0
    57
{
viric@0
    58
    printf("%s\t%i\n", w->w, w->def);
viric@0
    59
}
viric@0
    60
viric@0
    61
void load_dictionary(FILE *index, FILE *fdefs)
viric@0
    62
{
viric@0
    63
    struct Word w;
viric@0
    64
    int last_offset = 0;
viric@5
    65
    int def_avoided = 0;
viric@6
    66
    int numword = 0;;
viric@6
    67
    static int dispnwords = 0;
viric@0
    68
viric@0
    69
    do {
viric@0
    70
        int offset, length;
viric@0
    71
        char *defstr;
viric@0
    72
        w.w = get_word(index);
viric@6
    73
        /*numword++;
viric@6
    74
        printf("words: %i\n", numword);*/
viric@0
    75
        if (w.w == 0)
viric@0
    76
            break;
viric@5
    77
        /*printf("Word: %s\n", w.w);*/
viric@0
    78
        offset = get_int(index);
viric@0
    79
        length = get_int(index);
viric@0
    80
        if (offset > last_offset)
viric@0
    81
        {
viric@0
    82
            w.def = -1;
viric@0
    83
            last_offset = offset;
viric@0
    84
        }
viric@0
    85
        else
viric@0
    86
            w.def = search_def(offset, length);
viric@5
    87
        if (w.def == -1) 
viric@0
    88
        {
viric@5
    89
            /* New definition */
viric@5
    90
            int newindex, repindex;
viric@0
    91
            defstr = get_def(fdefs, offset, length);
viric@5
    92
            newindex = new_def(defstr, offset, length);
viric@6
    93
            /*
viric@6
    94
            printf("Length %i (%s): %i\n", newindex, w.w, length);
viric@6
    95
            */
viric@5
    96
            
viric@5
    97
            /* Store it in the hash for repeated defs */
viric@5
    98
            repindex = def_repeated(&defs[newindex]);
viric@5
    99
            if (repindex != -1) 
viric@5
   100
            {
viric@5
   101
                def_avoided += 1;
viric@6
   102
                /*
viric@6
   103
                printf("Repeated def avoided %i (for def %i)"
viric@6
   104
                        " (%s)\n%i %s\n%i %s\n",
viric@6
   105
                        def_avoided, repindex, w.w,
viric@6
   106
                        length, defstr,
viric@6
   107
                        defs[repindex].length, defs[repindex].d);
viric@6
   108
                        */
viric@5
   109
                remove_def(newindex);
viric@5
   110
                newindex = repindex;
viric@5
   111
            } else
viric@5
   112
                new_hashdef(&defs[newindex], newindex);
viric@5
   113
viric@5
   114
            /* Store the final index */
viric@5
   115
            w.def = newindex;
viric@0
   116
        }
viric@0
   117
        /* sizeof -1  instead of strlen() */
viric@0
   118
        if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
viric@0
   119
                new_dont_touch(w.def);
viric@0
   120
        new_word(&w);
viric@6
   121
viric@6
   122
        /* stdout Display */
viric@6
   123
        dispnwords++;
viric@6
   124
        if (dispnwords >= 1000)
viric@6
   125
        {
viric@6
   126
            dispnwords = 0;
viric@6
   127
            printf("Loaded: %i Repeated definitions avoided: %i\n", nwords,
viric@6
   128
                    def_avoided);
viric@6
   129
        }
viric@6
   130
viric@0
   131
    } while(1);
viric@0
   132
}
viric@0
   133
viric@0
   134
void print_words()
viric@0
   135
{
viric@0
   136
    int i;
viric@0
   137
viric@0
   138
    for(i=0; i < nwords; ++i)
viric@0
   139
        print_word(&words[i]);
viric@0
   140
}