--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/zhash.c Tue Aug 28 01:03:24 2007 +0200
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <assert.h>
+#include "dictre.h"
+
+enum
+{
+ MAXHASH=1<<16
+};
+
+struct BareWord
+{
+ struct BareWord *next;
+ char *str;
+};
+
+struct WordEntry
+{
+ struct WordEntry *next;
+ char *str;
+ struct BareWord *accented;
+ struct BareWord *unflexed;
+};
+
+static struct WordEntry * wordlist[MAXHASH];
+
+struct WordEntry * new_WordEntry()
+{
+ struct WordEntry *tmp;
+ tmp = (struct WordEntry *) malloc(sizeof(*tmp));
+ assert(tmp != 0);
+ return tmp;
+}
+
+struct BareWord * new_BareWord()
+{
+ struct BareWord *tmp;
+ tmp = (struct BareWord *) malloc(sizeof(*tmp));
+ assert(tmp != 0);
+ return tmp;
+}
+
+void init_wordlist()
+{
+ int i;
+ for(i=0; i < MAXHASH; ++i)
+ {
+ struct WordEntry *nodata;
+ nodata = new_WordEntry();
+ assert(nodata != 0);
+ nodata->str = 0;
+ nodata->accented = 0;
+ nodata->unflexed = 0;
+ nodata->next = 0;
+ wordlist[i] = nodata;
+ }
+}
+
+static unsigned int hash_func(const unsigned char *str)
+{
+ int res;
+ char v;
+
+ v = 0;
+
+ /* Taking only the meaningful utf-8 codes */
+ if (str[2] != 0)
+ v = str[3];
+
+ res = (str[1] << 8) + v;
+
+ return res;
+}
+
+/* Word without accent */
+struct WordEntry * does_word_exist(int hash, const char *word)
+{
+ struct WordEntry *tmp;
+
+ for(tmp = wordlist[hash]; tmp != 0; tmp = tmp->next)
+ {
+ if (tmp->str) /* The last item in the linked list will have str=0 */
+ if (strcmp(word, tmp->str) == 0)
+ return tmp;
+ }
+ return 0;
+}
+
+void add_to_unflexed(struct WordEntry *pos, const char *word)
+{
+ struct BareWord *tmp;
+
+ if (pos->unflexed == 0)
+ {
+ pos->unflexed = new_BareWord();
+ tmp = pos->unflexed;
+ tmp->str = strdup(word);
+ tmp->next = 0;
+ } else
+ {
+ /* Look for the same word */
+ for(tmp = pos->unflexed; tmp != 0; tmp = tmp->next)
+ {
+ if (strcmp(word, pos->str) == 0)
+ break;
+ }
+ if (tmp == 0)
+ {
+ tmp = new_BareWord();
+ } else
+ {
+ struct BareWord *new;
+ new = new_BareWord();
+ }
+ }
+}
+
+void set_accented(struct WordEntry *pos, const char *word)
+{
+ if (pos->accented)
+ /* Will free the first parameter */
+ pos->accented->str = mix_accents(pos->accented->str, word);
+ else
+ {
+ pos->accented = new_BareWord();
+ pos->accented->str = strdup(word);
+ pos->accented->next = 0;
+ }
+}
+
+void insert_word(const char *word, const char *unflexed)
+{
+ int hash;
+ unsigned char word_no_accent[MAXWORD];
+ struct WordEntry *found;
+ unsigned int hash_num;
+
+ remove_accent(word_no_accent, word);
+
+ hash_num = hash_func(word_no_accent);
+
+ /* Where to insert */
+ found = does_word_exist(hash_num, word_no_accent);
+ if (found)
+ {
+ set_accented(found, word);
+ /* TODO process word_no_accent */
+ } else /* Does not exist */
+ {
+ /* new word */
+ struct WordEntry *new;
+
+ new = new_WordEntry();
+ new->str = strdup(word_no_accent);
+ new->unflexed = 0;
+ add_to_unflexed(new, unflexed);
+ new->accented = 0;
+ set_accented(new, word);
+ /* Put it on the head of the hash list */
+ new->next = wordlist[hash_num];
+ wordlist[hash_num] = new;
+ }
+}
+
+static void dump_word(struct WordEntry *word)
+{
+ printf("%s:%s\n", word->str, word->accented->str);
+}
+
+void dump_wordlist()
+{
+ int i;
+ for(i=0; i < MAXHASH; ++i)
+ {
+ struct WordEntry *word;
+ word = wordlist[i];
+ while (word != 0)
+ {
+ if (word->str)
+ dump_word(word);
+ word = word->next;
+ }
+ }
+}