viric@0: #include viric@0: viric@0: #include "dictre.h" viric@0: viric@0: enum viric@0: { viric@0: MAX=500000 viric@0: }; viric@0: viric@0: struct Word words[MAX]; viric@0: int nwords; viric@0: struct Def defs[MAX]; viric@0: int ndefs; viric@0: int dont_touch[20]; viric@0: int ndont_touch; viric@0: viric@5: void init_load() viric@0: { viric@0: ndefs = 0; viric@0: nwords = 0; viric@0: ndont_touch = 0; viric@0: } viric@0: viric@0: static void new_word(struct Word *from) viric@0: { viric@0: memcpy(&words[nwords], from, sizeof(*from)); viric@0: nwords++; viric@0: } viric@0: viric@0: static void new_dont_touch(int n) viric@0: { viric@0: dont_touch[ndont_touch++] = n; viric@0: } viric@0: viric@0: static int new_def(char *def, int offset, int length) viric@0: { viric@0: defs[ndefs].d = def; viric@0: defs[ndefs].offset = offset; viric@0: defs[ndefs].length = length; viric@0: return ndefs++; viric@0: } viric@0: viric@0: static int search_def(int offset, int length) viric@0: { viric@0: int i; viric@0: viric@0: for(i=0; i < ndefs; ++i) viric@0: { viric@0: if (defs[i].offset == offset && viric@0: defs[i].length == length) viric@0: return i; viric@0: } viric@0: return -1; viric@0: } viric@0: viric@0: static void print_word(struct Word *w) viric@0: { viric@0: printf("%s\t%i\n", w->w, w->def); viric@0: } viric@0: viric@0: void load_dictionary(FILE *index, FILE *fdefs) viric@0: { viric@0: struct Word w; viric@0: int last_offset = 0; viric@5: int def_avoided = 0; viric@6: int numword = 0;; viric@6: static int dispnwords = 0; viric@0: viric@0: do { viric@0: int offset, length; viric@0: char *defstr; viric@0: w.w = get_word(index); viric@6: /*numword++; viric@6: printf("words: %i\n", numword);*/ viric@0: if (w.w == 0) viric@0: break; viric@5: /*printf("Word: %s\n", w.w);*/ viric@0: offset = get_int(index); viric@0: length = get_int(index); viric@0: if (offset > last_offset) viric@0: { viric@0: w.def = -1; viric@0: last_offset = offset; viric@0: } viric@0: else viric@0: w.def = search_def(offset, length); viric@5: if (w.def == -1) viric@0: { viric@5: /* New definition */ viric@5: int newindex, repindex; viric@0: defstr = get_def(fdefs, offset, length); viric@5: newindex = new_def(defstr, offset, length); viric@6: /* viric@6: printf("Length %i (%s): %i\n", newindex, w.w, length); viric@6: */ viric@5: viric@5: /* Store it in the hash for repeated defs */ viric@5: repindex = def_repeated(&defs[newindex]); viric@5: if (repindex != -1) viric@5: { viric@5: def_avoided += 1; viric@6: /* viric@6: printf("Repeated def avoided %i (for def %i)" viric@6: " (%s)\n%i %s\n%i %s\n", viric@6: def_avoided, repindex, w.w, viric@6: length, defstr, viric@6: defs[repindex].length, defs[repindex].d); viric@6: */ viric@5: remove_def(newindex); viric@5: newindex = repindex; viric@5: } else viric@5: new_hashdef(&defs[newindex], newindex); viric@5: viric@5: /* Store the final index */ viric@5: w.def = newindex; viric@0: } viric@0: /* sizeof -1 instead of strlen() */ viric@0: if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0) viric@0: new_dont_touch(w.def); viric@0: new_word(&w); viric@6: viric@6: /* stdout Display */ viric@6: dispnwords++; viric@6: if (dispnwords >= 1000) viric@6: { viric@6: dispnwords = 0; viric@6: printf("Loaded: %i Repeated definitions avoided: %i\n", nwords, viric@6: def_avoided); viric@6: } viric@6: viric@0: } while(1); viric@0: } viric@0: viric@0: void print_words() viric@0: { viric@0: int i; viric@0: viric@0: for(i=0; i < nwords; ++i) viric@0: print_word(&words[i]); viric@0: }