General interface to dictionary search.
authorviric@llimona
Sat, 01 Sep 2007 12:26:22 +0200
changeset 17 d95d9e7a2b81
parent 16 b4e251400e36
child 18 64ed4238657f
General interface to dictionary search.
Makefile
dictre.h
find.c
parse_text.c
zrustest
--- a/Makefile	Sat Sep 01 01:19:18 2007 +0200
+++ b/Makefile	Sat Sep 01 12:26:22 2007 +0200
@@ -1,8 +1,15 @@
 CFLAGS=-g
 CC=gcc
+CXX=g++
+ICULIBS=-pthread -static /usr/lib/libicui18n.a \
+	/usr/lib/libicuuc.a \
+	/usr/lib/libicudata.a
+
+#ICULIBS=-licui18n -licuuc -licudata
+
 
 all: dictre idx2index trim-nou8 ia5 asciiigi-utf8-akcenton zprocess \
-	zparsetext zrustest
+	zparsetext zrustest http_dec_test
 
 idx2index: idx2index.o dict.o
 trim-nou8: trim-nou8.c
@@ -14,13 +21,16 @@
 	$(CC) -o $@ $^
 
 zprocess: zload.o dict.o zdefs.o zhash.o zrus.o
-	$(CC) -o $@ $^ -licui18n -licuuc -licudata
+	$(CXX) -o $@ $^ $(ICULIBS)
 
-zparsetext: parse_text.o zrus.o find.o dict.o
-	$(CC) -o $@ $^ -licui18n -licuuc -licudata
+zparsetext: parse_text.o zrus.o find.o dict.o http_dec.o
+	$(CXX) -o $@ $^ $(ICULIBS)
 
 zrustest: zrustest.o zrus.o
-	$(CC) -o $@ $^ -licui18n -licuuc -licudata
+	$(CXX) -o $@ $^ $(ICULIBS)
+
+http_dec_test: http_dec_test.o http_dec.o
+	$(CXX) -o $@ $^ $(ICULIBS)
 
 dict.c: dictre.h
 write.c: dictre.h
@@ -37,3 +47,5 @@
 zrus.c: dictre.h
 find.c: dictre.h
 zrustest.c: dictre.h
+http_dec.c: dictre.h
+http_dec_test.c: dictre.h
--- a/dictre.h	Sat Sep 01 01:19:18 2007 +0200
+++ b/dictre.h	Sat Sep 01 12:26:22 2007 +0200
@@ -1,6 +1,8 @@
 enum {
     MAXWORD=200,
-    MAXDEF=10000
+    MAXDEF=10000,
+    END_OF_URL=-2,
+    HTTP_DECODE_ERROR=-3
 };
 
 enum Case
@@ -28,6 +30,14 @@
     int length;
 };
 
+struct Dict
+{
+    unsigned char *index;
+    int indexfd;
+    int indexsize;
+    FILE *defs;
+};
+
 /* write.c */
 void write_dictionary(const char *name);
 
@@ -81,4 +91,9 @@
 void remove_jo(char *str);
 
 /* find.c */
-void find_def(const char *word, char * def);
+void init_dictionary(struct Dict *d, const char *base);
+void end_dictionary(struct Dict *d);
+void find_def(struct Dict *d, const char *word, char * def);
+
+/* http_dec.c */
+int http_getc(FILE *f);
--- a/find.c	Sat Sep 01 01:19:18 2007 +0200
+++ b/find.c	Sat Sep 01 12:26:22 2007 +0200
@@ -5,12 +5,8 @@
 #include <fcntl.h>
 #include "dictre.h"
 
-static unsigned char *index;
-static int indexfd;
-static int indexsize;
-static FILE *defs;
-const static char indexname[] = "akcentiga.index";
-const static char dictname[] = "akcentiga.dict";
+const static char indexext[] = ".index";
+const static char dictext[] = ".dict";
 
 int get_filesize(const char *fname)
 {
@@ -27,56 +23,69 @@
     return st.st_size;
 }
 
-void init_dictionary()
+void init_dictionary(struct Dict *d, const char *base)
 {
-    indexsize = get_filesize(indexname);
-    indexfd = open(indexname, O_RDONLY);
-    if (indexfd == -1)
+    char *filename;
+
+    filename = (char *) malloc(strlen(base) + 10);
+
+    /* Prepare .index filename and open it*/
+    strcpy(filename, base);
+    strcat(filename, indexext);
+
+    d->indexsize = get_filesize(filename);
+    d->indexfd = open(filename, O_RDONLY);
+    if (d->indexfd == -1)
     {
-        fprintf(stderr, "Problem opening the file %s\n", indexname);
+        fprintf(stderr, "Problem opening the file %s\n", filename);
         perror("Error:");
         exit(-1);
     }
-    index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED,
-            indexfd, 0);
+    d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED,
+            d->indexfd, 0);
 
-    defs = fopen(dictname, "r");
-    if (defs == 0)
+    /* Prepare .dict filename and open it*/
+    strcpy(filename, base);
+    strcat(filename, dictext);
+    d->defs = fopen(filename, "r");
+    if (d->defs == 0)
     {
-        fprintf(stderr, "Problem opening the file %s\n", dictname);
+        fprintf(stderr, "Problem opening the file %s\n", filename);
         perror("Error:");
         exit(-1);
     }
-}
 
-void end_dictionary()
-{
-    munmap(index, indexsize);
-    close(indexfd);
-    fclose(defs);
+    free(filename);
 }
 
-static void fill_def(int offset, int length, char * def)
+void end_dictionary(struct Dict *d)
 {
-    fseek(defs, offset, SEEK_SET);
-    fread(def, 1, length, defs);
+    munmap(d->index, d->indexsize);
+    close(d->indexfd);
+    fclose(d->defs);
 }
 
-static int pointer_at_end(unsigned char *ptr)
+static void fill_def(struct Dict *d, int offset, int length, char * def)
 {
-    if (ptr >= (index + indexsize))
+    fseek(d->defs, offset, SEEK_SET);
+    fread(def, 1, length, d->defs);
+}
+
+static int pointer_at_end(struct Dict *d, unsigned char *ptr)
+{
+    if (ptr >= (d->index + d->indexsize))
         return 1;
     return 0;
 }
 
-static char * skip_until_newline(char *from)
+static char * skip_until_newline(struct Dict *d, char *from)
 {
-    if (pointer_at_end(from))
+    if (pointer_at_end(d, from))
         return 0;
     while(*from != '\n' && *from != 0)
     {
         ++from;
-        if(pointer_at_end(from))
+        if(pointer_at_end(d, from))
             return 0;
     }
     return from;
@@ -109,19 +118,19 @@
     return -1;
 }
 
-static char * bin_search(const char *word)
+static char * bin_search(struct Dict *d, const char *word)
 {
     int step, pivot;
 
-    pivot = indexsize / 2;
-    step = indexsize / 2;
+    pivot = d->indexsize / 2;
+    step = d->indexsize / 2;
 
     do
     {
         char *test;
         int comparision;
-        test = index + pivot;
-        test = skip_until_newline(test);
+        test = d->index + pivot;
+        test = skip_until_newline(d, test);
         if (test == 0)
             return 0;
         test += 1; /* skip exactly the new line */
@@ -157,12 +166,12 @@
     return val;
 }
 
-void find_def(const char *word, char * def)
+void find_def(struct Dict *d, const char *word, char * def)
 {
     int offset, len;
     char *pos;
 
-    pos = bin_search(word); /* pos points to the offset already. */
+    pos = bin_search(d, word); /* pos points to the offset already. */
     if (pos == 0)
     {
         def[0] = 0;
@@ -171,5 +180,5 @@
     }
     offset = my_get_int(&pos); /* increments pos */
     len = my_get_int(&pos); /* increments pos */
-    fill_def(offset, len, def);
+    fill_def(d, offset, len, def);
 }
--- a/parse_text.c	Sat Sep 01 01:19:18 2007 +0200
+++ b/parse_text.c	Sat Sep 01 12:26:22 2007 +0200
@@ -1,6 +1,11 @@
 #include <stdio.h>
+#include <stdlib.h>
 #include "dictre.h"
 
+static int is_http = 0;
+static int content_length = -1;
+static struct Dict dakcentiga;
+
 static void give_accent_to_word(const char *word)
 {
     char def[MAXDEF];
@@ -15,7 +20,7 @@
     get_lowcase_str(low, word);
 
     /* Find the lowercase version */
-    find_def(low, def);
+    find_def(&dakcentiga, low, def);
     if (def[0] != 0) /* found */
     {
         /* Print the word UNTIL a space.
@@ -38,6 +43,14 @@
     printf("%s", word);
 }
 
+static int my_fgetc(FILE *f)
+{
+    if (is_http)
+        return http_getc(f);
+    else
+        return fgetc(f);
+}
+
 static void process_text(FILE *in, int pos, int length)
 {
     unsigned char tmp[MAXWORD];
@@ -48,8 +61,8 @@
         /* Check pos only if length >= 0 */
         if (length >= 0 && pos >= length)
             break;
-        c = fgetc(in);
-        if (c == EOF)
+        c = my_fgetc(in);
+        if (c == EOF || c == END_OF_URL)
             break;
         if (is_ASCII(c))
         {
@@ -68,11 +81,51 @@
 
         pos += 1;
     } while(1);
+
+    /* End word */
+    if (wordpos != 0)
+    {
+        tmp[wordpos] = 0;
+        give_accent_to_word(tmp);
+        wordpos = 0;
+    }
+}
+
+static print_http_header()
+{
+    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
+}
+
+int eat_form_ok()
+{
+    const char mask[] = "teksto=";
+    char tmp[sizeof(mask)];
+    fread(tmp, 1, sizeof(mask)-1, stdin);
+    tmp[sizeof(mask)-1] = 0;
+    if (strcmp(mask, tmp) == 0)
+        return 1;
+    return 0;
 }
 
 int main()
 {
-    init_dictionary();
+    char *c;
+
+    init_dictionary(&dakcentiga, "akcentiga");
+
+    if (c = getenv("CONTENT_LENGTH"))
+    {
+        content_length = atoi(c);
+        is_http = 1;
+    }
+    if (is_http)
+    {
+        print_http_header();
+        if (!eat_form_ok())
+            return -1;
+    }
     process_text(stdin, 0, -1);
-    end_dictionary();
+    end_dictionary(&dakcentiga);
+
+    return 0;
 }
Binary file zrustest has changed