viric@11: #include viric@11: #include viric@11: #include "dictre.h" viric@11: viric@11: enum viric@11: { viric@11: MAXHASH=1<<16 viric@11: }; viric@11: viric@11: struct BareWord viric@11: { viric@11: struct BareWord *next; viric@11: char *str; viric@11: }; viric@11: viric@11: struct WordEntry viric@11: { viric@11: struct WordEntry *next; viric@11: char *str; viric@11: struct BareWord *accented; viric@11: struct BareWord *unflexed; viric@11: }; viric@11: viric@11: static struct WordEntry * wordlist[MAXHASH]; viric@11: viric@11: struct WordEntry * new_WordEntry() viric@11: { viric@11: struct WordEntry *tmp; viric@11: tmp = (struct WordEntry *) malloc(sizeof(*tmp)); viric@11: assert(tmp != 0); viric@11: return tmp; viric@11: } viric@11: viric@11: struct BareWord * new_BareWord() viric@11: { viric@11: struct BareWord *tmp; viric@11: tmp = (struct BareWord *) malloc(sizeof(*tmp)); viric@11: assert(tmp != 0); viric@11: return tmp; viric@11: } viric@11: viric@11: void init_wordlist() viric@11: { viric@11: int i; viric@11: for(i=0; i < MAXHASH; ++i) viric@11: { viric@11: struct WordEntry *nodata; viric@11: nodata = new_WordEntry(); viric@11: assert(nodata != 0); viric@11: nodata->str = 0; viric@11: nodata->accented = 0; viric@11: nodata->unflexed = 0; viric@11: nodata->next = 0; viric@11: wordlist[i] = nodata; viric@11: } viric@11: } viric@11: viric@11: static unsigned int hash_func(const unsigned char *str) viric@11: { viric@16: unsigned int v; viric@11: viric@16: /* for hashmax of 2^16 */ viric@11: viric@16: v = (str[1] & 15) << 4*3; viric@11: if (str[2] != 0) viric@16: v += (str[3] & 15) << 4*2; viric@16: if (str[4] != 0) viric@16: v += (str[5] & 15) << 4; viric@16: if (str[6] != 0) viric@16: v += (str[7] & 15); viric@11: viric@16: return v; viric@11: } viric@11: viric@11: /* Word without accent */ viric@11: struct WordEntry * does_word_exist(int hash, const char *word) viric@11: { viric@11: struct WordEntry *tmp; viric@11: viric@11: for(tmp = wordlist[hash]; tmp != 0; tmp = tmp->next) viric@11: { viric@11: if (tmp->str) /* The last item in the linked list will have str=0 */ viric@11: if (strcmp(word, tmp->str) == 0) viric@11: return tmp; viric@11: } viric@11: return 0; viric@11: } viric@11: viric@11: void add_to_unflexed(struct WordEntry *pos, const char *word) viric@11: { viric@11: struct BareWord *tmp; viric@11: viric@11: if (pos->unflexed == 0) viric@11: { viric@11: pos->unflexed = new_BareWord(); viric@11: tmp = pos->unflexed; viric@11: tmp->str = strdup(word); viric@11: tmp->next = 0; viric@11: } else viric@11: { viric@11: /* Look for the same word */ viric@11: for(tmp = pos->unflexed; tmp != 0; tmp = tmp->next) viric@11: { viric@13: if (strcmp(word, tmp->str) == 0) viric@11: break; viric@11: } viric@13: /* If not found... */ viric@11: if (tmp == 0) viric@11: { viric@11: tmp = new_BareWord(); viric@13: tmp->str = strdup(word); viric@13: tmp->next = pos->unflexed; viric@13: pos->unflexed = tmp; viric@11: } viric@11: } viric@11: } viric@11: viric@11: void set_accented(struct WordEntry *pos, const char *word) viric@11: { viric@11: if (pos->accented) viric@11: /* Will free the first parameter */ viric@11: pos->accented->str = mix_accents(pos->accented->str, word); viric@11: else viric@11: { viric@11: pos->accented = new_BareWord(); viric@11: pos->accented->str = strdup(word); viric@11: pos->accented->next = 0; viric@11: } viric@11: } viric@11: viric@11: void insert_word(const char *word, const char *unflexed) viric@11: { viric@11: int hash; viric@11: unsigned char word_no_accent[MAXWORD]; viric@11: struct WordEntry *found; viric@11: unsigned int hash_num; viric@11: viric@11: remove_accent(word_no_accent, word); viric@16: remove_jo(word_no_accent); viric@11: viric@11: hash_num = hash_func(word_no_accent); viric@11: viric@11: /* Where to insert */ viric@11: found = does_word_exist(hash_num, word_no_accent); viric@11: if (found) viric@11: { viric@11: set_accented(found, word); viric@13: add_to_unflexed(found, unflexed); viric@11: } else /* Does not exist */ viric@11: { viric@11: /* new word */ viric@11: struct WordEntry *new; viric@11: viric@11: new = new_WordEntry(); viric@11: new->str = strdup(word_no_accent); viric@11: new->unflexed = 0; viric@11: add_to_unflexed(new, unflexed); viric@11: new->accented = 0; viric@11: set_accented(new, word); viric@11: /* Put it on the head of the hash list */ viric@11: new->next = wordlist[hash_num]; viric@11: wordlist[hash_num] = new; viric@11: } viric@11: } viric@11: viric@11: static void dump_word(struct WordEntry *word) viric@11: { viric@13: struct BareWord *tmp; viric@13: printf(":%s:%s", word->str, word->accented->str); viric@13: viric@13: for(tmp = word->unflexed; tmp != 0; tmp = tmp->next) viric@13: { viric@13: printf(" %s", tmp->str); viric@13: } viric@13: printf("\n"); viric@11: } viric@11: viric@11: void dump_wordlist() viric@11: { viric@11: int i; viric@11: for(i=0; i < MAXHASH; ++i) viric@11: { viric@11: struct WordEntry *word; viric@11: word = wordlist[i]; viric@11: while (word != 0) viric@11: { viric@11: if (word->str) viric@11: dump_word(word); viric@11: word = word->next; viric@11: } viric@11: } viric@11: }