General interface to dictionary search.
--- a/Makefile Sat Sep 01 01:19:18 2007 +0200
+++ b/Makefile Sat Sep 01 12:26:22 2007 +0200
@@ -1,8 +1,15 @@
CFLAGS=-g
CC=gcc
+CXX=g++
+ICULIBS=-pthread -static /usr/lib/libicui18n.a \
+ /usr/lib/libicuuc.a \
+ /usr/lib/libicudata.a
+
+#ICULIBS=-licui18n -licuuc -licudata
+
all: dictre idx2index trim-nou8 ia5 asciiigi-utf8-akcenton zprocess \
- zparsetext zrustest
+ zparsetext zrustest http_dec_test
idx2index: idx2index.o dict.o
trim-nou8: trim-nou8.c
@@ -14,13 +21,16 @@
$(CC) -o $@ $^
zprocess: zload.o dict.o zdefs.o zhash.o zrus.o
- $(CC) -o $@ $^ -licui18n -licuuc -licudata
+ $(CXX) -o $@ $^ $(ICULIBS)
-zparsetext: parse_text.o zrus.o find.o dict.o
- $(CC) -o $@ $^ -licui18n -licuuc -licudata
+zparsetext: parse_text.o zrus.o find.o dict.o http_dec.o
+ $(CXX) -o $@ $^ $(ICULIBS)
zrustest: zrustest.o zrus.o
- $(CC) -o $@ $^ -licui18n -licuuc -licudata
+ $(CXX) -o $@ $^ $(ICULIBS)
+
+http_dec_test: http_dec_test.o http_dec.o
+ $(CXX) -o $@ $^ $(ICULIBS)
dict.c: dictre.h
write.c: dictre.h
@@ -37,3 +47,5 @@
zrus.c: dictre.h
find.c: dictre.h
zrustest.c: dictre.h
+http_dec.c: dictre.h
+http_dec_test.c: dictre.h
--- a/dictre.h Sat Sep 01 01:19:18 2007 +0200
+++ b/dictre.h Sat Sep 01 12:26:22 2007 +0200
@@ -1,6 +1,8 @@
enum {
MAXWORD=200,
- MAXDEF=10000
+ MAXDEF=10000,
+ END_OF_URL=-2,
+ HTTP_DECODE_ERROR=-3
};
enum Case
@@ -28,6 +30,14 @@
int length;
};
+struct Dict
+{
+ unsigned char *index;
+ int indexfd;
+ int indexsize;
+ FILE *defs;
+};
+
/* write.c */
void write_dictionary(const char *name);
@@ -81,4 +91,9 @@
void remove_jo(char *str);
/* find.c */
-void find_def(const char *word, char * def);
+void init_dictionary(struct Dict *d, const char *base);
+void end_dictionary(struct Dict *d);
+void find_def(struct Dict *d, const char *word, char * def);
+
+/* http_dec.c */
+int http_getc(FILE *f);
--- a/find.c Sat Sep 01 01:19:18 2007 +0200
+++ b/find.c Sat Sep 01 12:26:22 2007 +0200
@@ -5,12 +5,8 @@
#include <fcntl.h>
#include "dictre.h"
-static unsigned char *index;
-static int indexfd;
-static int indexsize;
-static FILE *defs;
-const static char indexname[] = "akcentiga.index";
-const static char dictname[] = "akcentiga.dict";
+const static char indexext[] = ".index";
+const static char dictext[] = ".dict";
int get_filesize(const char *fname)
{
@@ -27,56 +23,69 @@
return st.st_size;
}
-void init_dictionary()
+void init_dictionary(struct Dict *d, const char *base)
{
- indexsize = get_filesize(indexname);
- indexfd = open(indexname, O_RDONLY);
- if (indexfd == -1)
+ char *filename;
+
+ filename = (char *) malloc(strlen(base) + 10);
+
+ /* Prepare .index filename and open it*/
+ strcpy(filename, base);
+ strcat(filename, indexext);
+
+ d->indexsize = get_filesize(filename);
+ d->indexfd = open(filename, O_RDONLY);
+ if (d->indexfd == -1)
{
- fprintf(stderr, "Problem opening the file %s\n", indexname);
+ fprintf(stderr, "Problem opening the file %s\n", filename);
perror("Error:");
exit(-1);
}
- index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED,
- indexfd, 0);
+ d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED,
+ d->indexfd, 0);
- defs = fopen(dictname, "r");
- if (defs == 0)
+ /* Prepare .dict filename and open it*/
+ strcpy(filename, base);
+ strcat(filename, dictext);
+ d->defs = fopen(filename, "r");
+ if (d->defs == 0)
{
- fprintf(stderr, "Problem opening the file %s\n", dictname);
+ fprintf(stderr, "Problem opening the file %s\n", filename);
perror("Error:");
exit(-1);
}
-}
-void end_dictionary()
-{
- munmap(index, indexsize);
- close(indexfd);
- fclose(defs);
+ free(filename);
}
-static void fill_def(int offset, int length, char * def)
+void end_dictionary(struct Dict *d)
{
- fseek(defs, offset, SEEK_SET);
- fread(def, 1, length, defs);
+ munmap(d->index, d->indexsize);
+ close(d->indexfd);
+ fclose(d->defs);
}
-static int pointer_at_end(unsigned char *ptr)
+static void fill_def(struct Dict *d, int offset, int length, char * def)
{
- if (ptr >= (index + indexsize))
+ fseek(d->defs, offset, SEEK_SET);
+ fread(def, 1, length, d->defs);
+}
+
+static int pointer_at_end(struct Dict *d, unsigned char *ptr)
+{
+ if (ptr >= (d->index + d->indexsize))
return 1;
return 0;
}
-static char * skip_until_newline(char *from)
+static char * skip_until_newline(struct Dict *d, char *from)
{
- if (pointer_at_end(from))
+ if (pointer_at_end(d, from))
return 0;
while(*from != '\n' && *from != 0)
{
++from;
- if(pointer_at_end(from))
+ if(pointer_at_end(d, from))
return 0;
}
return from;
@@ -109,19 +118,19 @@
return -1;
}
-static char * bin_search(const char *word)
+static char * bin_search(struct Dict *d, const char *word)
{
int step, pivot;
- pivot = indexsize / 2;
- step = indexsize / 2;
+ pivot = d->indexsize / 2;
+ step = d->indexsize / 2;
do
{
char *test;
int comparision;
- test = index + pivot;
- test = skip_until_newline(test);
+ test = d->index + pivot;
+ test = skip_until_newline(d, test);
if (test == 0)
return 0;
test += 1; /* skip exactly the new line */
@@ -157,12 +166,12 @@
return val;
}
-void find_def(const char *word, char * def)
+void find_def(struct Dict *d, const char *word, char * def)
{
int offset, len;
char *pos;
- pos = bin_search(word); /* pos points to the offset already. */
+ pos = bin_search(d, word); /* pos points to the offset already. */
if (pos == 0)
{
def[0] = 0;
@@ -171,5 +180,5 @@
}
offset = my_get_int(&pos); /* increments pos */
len = my_get_int(&pos); /* increments pos */
- fill_def(offset, len, def);
+ fill_def(d, offset, len, def);
}
--- a/parse_text.c Sat Sep 01 01:19:18 2007 +0200
+++ b/parse_text.c Sat Sep 01 12:26:22 2007 +0200
@@ -1,6 +1,11 @@
#include <stdio.h>
+#include <stdlib.h>
#include "dictre.h"
+static int is_http = 0;
+static int content_length = -1;
+static struct Dict dakcentiga;
+
static void give_accent_to_word(const char *word)
{
char def[MAXDEF];
@@ -15,7 +20,7 @@
get_lowcase_str(low, word);
/* Find the lowercase version */
- find_def(low, def);
+ find_def(&dakcentiga, low, def);
if (def[0] != 0) /* found */
{
/* Print the word UNTIL a space.
@@ -38,6 +43,14 @@
printf("%s", word);
}
+static int my_fgetc(FILE *f)
+{
+ if (is_http)
+ return http_getc(f);
+ else
+ return fgetc(f);
+}
+
static void process_text(FILE *in, int pos, int length)
{
unsigned char tmp[MAXWORD];
@@ -48,8 +61,8 @@
/* Check pos only if length >= 0 */
if (length >= 0 && pos >= length)
break;
- c = fgetc(in);
- if (c == EOF)
+ c = my_fgetc(in);
+ if (c == EOF || c == END_OF_URL)
break;
if (is_ASCII(c))
{
@@ -68,11 +81,51 @@
pos += 1;
} while(1);
+
+ /* End word */
+ if (wordpos != 0)
+ {
+ tmp[wordpos] = 0;
+ give_accent_to_word(tmp);
+ wordpos = 0;
+ }
+}
+
+static print_http_header()
+{
+ printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
+}
+
+int eat_form_ok()
+{
+ const char mask[] = "teksto=";
+ char tmp[sizeof(mask)];
+ fread(tmp, 1, sizeof(mask)-1, stdin);
+ tmp[sizeof(mask)-1] = 0;
+ if (strcmp(mask, tmp) == 0)
+ return 1;
+ return 0;
}
int main()
{
- init_dictionary();
+ char *c;
+
+ init_dictionary(&dakcentiga, "akcentiga");
+
+ if (c = getenv("CONTENT_LENGTH"))
+ {
+ content_length = atoi(c);
+ is_http = 1;
+ }
+ if (is_http)
+ {
+ print_http_header();
+ if (!eat_form_ok())
+ return -1;
+ }
process_text(stdin, 0, -1);
- end_dictionary();
+ end_dictionary(&dakcentiga);
+
+ return 0;
}
Binary file zrustest has changed