load.c
author viric@mandarina
Mon, 13 Aug 2007 16:27:41 +0200
changeset 5 c87681fff7d3
parent 2 57a1fcb0c75c
child 6 bc41369f4587
permissions -rw-r--r--
Checks for repeated definitions.
viric@0
     1
#include <stdio.h>
viric@0
     2
viric@0
     3
#include "dictre.h"
viric@0
     4
viric@0
     5
enum
viric@0
     6
{
viric@0
     7
    MAX=500000
viric@0
     8
};
viric@0
     9
viric@0
    10
struct Word words[MAX];
viric@0
    11
int nwords;
viric@0
    12
struct Def defs[MAX];
viric@0
    13
int ndefs;
viric@0
    14
int dont_touch[20];
viric@0
    15
int ndont_touch;
viric@0
    16
viric@5
    17
void init_load()
viric@0
    18
{
viric@0
    19
    ndefs = 0;
viric@0
    20
    nwords = 0;
viric@0
    21
    ndont_touch = 0;
viric@0
    22
}
viric@0
    23
viric@0
    24
static void new_word(struct Word *from)
viric@0
    25
{
viric@2
    26
    static int dispnwords = 0;
viric@0
    27
    memcpy(&words[nwords], from, sizeof(*from));
viric@0
    28
    nwords++;
viric@2
    29
    dispnwords++;
viric@2
    30
    if (dispnwords >= 1000)
viric@2
    31
    {
viric@2
    32
        dispnwords = 0;
viric@2
    33
        printf("Loaded: %i\n", nwords);
viric@2
    34
    }
viric@0
    35
}
viric@0
    36
viric@0
    37
static void new_dont_touch(int n)
viric@0
    38
{
viric@0
    39
    dont_touch[ndont_touch++] = n;
viric@0
    40
}
viric@0
    41
viric@0
    42
static int new_def(char *def, int offset, int length)
viric@0
    43
{
viric@0
    44
    defs[ndefs].d = def;
viric@0
    45
    defs[ndefs].offset = offset;
viric@0
    46
    defs[ndefs].length = length;
viric@0
    47
    return ndefs++;
viric@0
    48
}
viric@0
    49
viric@0
    50
static int search_def(int offset, int length)
viric@0
    51
{
viric@0
    52
    int i;
viric@0
    53
viric@0
    54
    for(i=0; i < ndefs; ++i)
viric@0
    55
    {
viric@0
    56
        if (defs[i].offset == offset &&
viric@0
    57
                defs[i].length == length)
viric@0
    58
            return i;
viric@0
    59
    }
viric@0
    60
    return -1;
viric@0
    61
}
viric@0
    62
viric@0
    63
static void print_word(struct Word *w)
viric@0
    64
{
viric@0
    65
    printf("%s\t%i\n", w->w, w->def);
viric@0
    66
}
viric@0
    67
viric@0
    68
void load_dictionary(FILE *index, FILE *fdefs)
viric@0
    69
{
viric@0
    70
    struct Word w;
viric@0
    71
    int last_offset = 0;
viric@5
    72
    int def_avoided = 0;
viric@0
    73
viric@0
    74
    do {
viric@0
    75
        int offset, length;
viric@0
    76
        char *defstr;
viric@0
    77
        w.w = get_word(index);
viric@0
    78
        if (w.w == 0)
viric@0
    79
            break;
viric@5
    80
        /*printf("Word: %s\n", w.w);*/
viric@0
    81
        offset = get_int(index);
viric@0
    82
        length = get_int(index);
viric@0
    83
        if (offset > last_offset)
viric@0
    84
        {
viric@0
    85
            w.def = -1;
viric@0
    86
            last_offset = offset;
viric@0
    87
        }
viric@0
    88
        else
viric@0
    89
            w.def = search_def(offset, length);
viric@5
    90
        if (w.def == -1) 
viric@0
    91
        {
viric@5
    92
            /* New definition */
viric@5
    93
            int newindex, repindex;
viric@0
    94
            defstr = get_def(fdefs, offset, length);
viric@5
    95
            newindex = new_def(defstr, offset, length);
viric@5
    96
            
viric@5
    97
            /* Store it in the hash for repeated defs */
viric@5
    98
            repindex = def_repeated(&defs[newindex]);
viric@5
    99
            if (repindex != -1) 
viric@5
   100
            {
viric@5
   101
                def_avoided += 1;
viric@5
   102
                printf("Repeated def avoided %i (word %s)\n", def_avoided, w.w);
viric@5
   103
                remove_def(newindex);
viric@5
   104
                newindex = repindex;
viric@5
   105
            } else
viric@5
   106
                new_hashdef(&defs[newindex], newindex);
viric@5
   107
viric@5
   108
            /* Store the final index */
viric@5
   109
            w.def = newindex;
viric@0
   110
        }
viric@0
   111
        /* sizeof -1  instead of strlen() */
viric@0
   112
        if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
viric@0
   113
                new_dont_touch(w.def);
viric@0
   114
        new_word(&w);
viric@0
   115
    } while(1);
viric@0
   116
}
viric@0
   117
viric@0
   118
void print_words()
viric@0
   119
{
viric@0
   120
    int i;
viric@0
   121
viric@0
   122
    for(i=0; i < nwords; ++i)
viric@0
   123
        print_word(&words[i]);
viric@0
   124
}