# HG changeset patch # User viric@llimona # Date 1188642382 -7200 # Node ID d95d9e7a2b81a574a9840698958efca1d8a20b52 # Parent b4e251400e36fc0c7c05f1331c55d6ebbda707c6 General interface to dictionary search. diff -r b4e251400e36 -r d95d9e7a2b81 Makefile --- a/Makefile Sat Sep 01 01:19:18 2007 +0200 +++ b/Makefile Sat Sep 01 12:26:22 2007 +0200 @@ -1,8 +1,15 @@ CFLAGS=-g CC=gcc +CXX=g++ +ICULIBS=-pthread -static /usr/lib/libicui18n.a \ + /usr/lib/libicuuc.a \ + /usr/lib/libicudata.a + +#ICULIBS=-licui18n -licuuc -licudata + all: dictre idx2index trim-nou8 ia5 asciiigi-utf8-akcenton zprocess \ - zparsetext zrustest + zparsetext zrustest http_dec_test idx2index: idx2index.o dict.o trim-nou8: trim-nou8.c @@ -14,13 +21,16 @@ $(CC) -o $@ $^ zprocess: zload.o dict.o zdefs.o zhash.o zrus.o - $(CC) -o $@ $^ -licui18n -licuuc -licudata + $(CXX) -o $@ $^ $(ICULIBS) -zparsetext: parse_text.o zrus.o find.o dict.o - $(CC) -o $@ $^ -licui18n -licuuc -licudata +zparsetext: parse_text.o zrus.o find.o dict.o http_dec.o + $(CXX) -o $@ $^ $(ICULIBS) zrustest: zrustest.o zrus.o - $(CC) -o $@ $^ -licui18n -licuuc -licudata + $(CXX) -o $@ $^ $(ICULIBS) + +http_dec_test: http_dec_test.o http_dec.o + $(CXX) -o $@ $^ $(ICULIBS) dict.c: dictre.h write.c: dictre.h @@ -37,3 +47,5 @@ zrus.c: dictre.h find.c: dictre.h zrustest.c: dictre.h +http_dec.c: dictre.h +http_dec_test.c: dictre.h diff -r b4e251400e36 -r d95d9e7a2b81 dictre.h --- a/dictre.h Sat Sep 01 01:19:18 2007 +0200 +++ b/dictre.h Sat Sep 01 12:26:22 2007 +0200 @@ -1,6 +1,8 @@ enum { MAXWORD=200, - MAXDEF=10000 + MAXDEF=10000, + END_OF_URL=-2, + HTTP_DECODE_ERROR=-3 }; enum Case @@ -28,6 +30,14 @@ int length; }; +struct Dict +{ + unsigned char *index; + int indexfd; + int indexsize; + FILE *defs; +}; + /* write.c */ void write_dictionary(const char *name); @@ -81,4 +91,9 @@ void remove_jo(char *str); /* find.c */ -void find_def(const char *word, char * def); +void init_dictionary(struct Dict *d, const char *base); +void end_dictionary(struct Dict *d); +void find_def(struct Dict *d, const char *word, char * def); + +/* http_dec.c */ +int http_getc(FILE *f); diff -r b4e251400e36 -r d95d9e7a2b81 find.c --- a/find.c Sat Sep 01 01:19:18 2007 +0200 +++ b/find.c Sat Sep 01 12:26:22 2007 +0200 @@ -5,12 +5,8 @@ #include #include "dictre.h" -static unsigned char *index; -static int indexfd; -static int indexsize; -static FILE *defs; -const static char indexname[] = "akcentiga.index"; -const static char dictname[] = "akcentiga.dict"; +const static char indexext[] = ".index"; +const static char dictext[] = ".dict"; int get_filesize(const char *fname) { @@ -27,56 +23,69 @@ return st.st_size; } -void init_dictionary() +void init_dictionary(struct Dict *d, const char *base) { - indexsize = get_filesize(indexname); - indexfd = open(indexname, O_RDONLY); - if (indexfd == -1) + char *filename; + + filename = (char *) malloc(strlen(base) + 10); + + /* Prepare .index filename and open it*/ + strcpy(filename, base); + strcat(filename, indexext); + + d->indexsize = get_filesize(filename); + d->indexfd = open(filename, O_RDONLY); + if (d->indexfd == -1) { - fprintf(stderr, "Problem opening the file %s\n", indexname); + fprintf(stderr, "Problem opening the file %s\n", filename); perror("Error:"); exit(-1); } - index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED, - indexfd, 0); + d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED, + d->indexfd, 0); - defs = fopen(dictname, "r"); - if (defs == 0) + /* Prepare .dict filename and open it*/ + strcpy(filename, base); + strcat(filename, dictext); + d->defs = fopen(filename, "r"); + if (d->defs == 0) { - fprintf(stderr, "Problem opening the file %s\n", dictname); + fprintf(stderr, "Problem opening the file %s\n", filename); perror("Error:"); exit(-1); } -} -void end_dictionary() -{ - munmap(index, indexsize); - close(indexfd); - fclose(defs); + free(filename); } -static void fill_def(int offset, int length, char * def) +void end_dictionary(struct Dict *d) { - fseek(defs, offset, SEEK_SET); - fread(def, 1, length, defs); + munmap(d->index, d->indexsize); + close(d->indexfd); + fclose(d->defs); } -static int pointer_at_end(unsigned char *ptr) +static void fill_def(struct Dict *d, int offset, int length, char * def) { - if (ptr >= (index + indexsize)) + fseek(d->defs, offset, SEEK_SET); + fread(def, 1, length, d->defs); +} + +static int pointer_at_end(struct Dict *d, unsigned char *ptr) +{ + if (ptr >= (d->index + d->indexsize)) return 1; return 0; } -static char * skip_until_newline(char *from) +static char * skip_until_newline(struct Dict *d, char *from) { - if (pointer_at_end(from)) + if (pointer_at_end(d, from)) return 0; while(*from != '\n' && *from != 0) { ++from; - if(pointer_at_end(from)) + if(pointer_at_end(d, from)) return 0; } return from; @@ -109,19 +118,19 @@ return -1; } -static char * bin_search(const char *word) +static char * bin_search(struct Dict *d, const char *word) { int step, pivot; - pivot = indexsize / 2; - step = indexsize / 2; + pivot = d->indexsize / 2; + step = d->indexsize / 2; do { char *test; int comparision; - test = index + pivot; - test = skip_until_newline(test); + test = d->index + pivot; + test = skip_until_newline(d, test); if (test == 0) return 0; test += 1; /* skip exactly the new line */ @@ -157,12 +166,12 @@ return val; } -void find_def(const char *word, char * def) +void find_def(struct Dict *d, const char *word, char * def) { int offset, len; char *pos; - pos = bin_search(word); /* pos points to the offset already. */ + pos = bin_search(d, word); /* pos points to the offset already. */ if (pos == 0) { def[0] = 0; @@ -171,5 +180,5 @@ } offset = my_get_int(&pos); /* increments pos */ len = my_get_int(&pos); /* increments pos */ - fill_def(offset, len, def); + fill_def(d, offset, len, def); } diff -r b4e251400e36 -r d95d9e7a2b81 parse_text.c --- a/parse_text.c Sat Sep 01 01:19:18 2007 +0200 +++ b/parse_text.c Sat Sep 01 12:26:22 2007 +0200 @@ -1,6 +1,11 @@ #include +#include #include "dictre.h" +static int is_http = 0; +static int content_length = -1; +static struct Dict dakcentiga; + static void give_accent_to_word(const char *word) { char def[MAXDEF]; @@ -15,7 +20,7 @@ get_lowcase_str(low, word); /* Find the lowercase version */ - find_def(low, def); + find_def(&dakcentiga, low, def); if (def[0] != 0) /* found */ { /* Print the word UNTIL a space. @@ -38,6 +43,14 @@ printf("%s", word); } +static int my_fgetc(FILE *f) +{ + if (is_http) + return http_getc(f); + else + return fgetc(f); +} + static void process_text(FILE *in, int pos, int length) { unsigned char tmp[MAXWORD]; @@ -48,8 +61,8 @@ /* Check pos only if length >= 0 */ if (length >= 0 && pos >= length) break; - c = fgetc(in); - if (c == EOF) + c = my_fgetc(in); + if (c == EOF || c == END_OF_URL) break; if (is_ASCII(c)) { @@ -68,11 +81,51 @@ pos += 1; } while(1); + + /* End word */ + if (wordpos != 0) + { + tmp[wordpos] = 0; + give_accent_to_word(tmp); + wordpos = 0; + } +} + +static print_http_header() +{ + printf("Content-Type:text/html;charset=utf-8\r\n\r\n"); +} + +int eat_form_ok() +{ + const char mask[] = "teksto="; + char tmp[sizeof(mask)]; + fread(tmp, 1, sizeof(mask)-1, stdin); + tmp[sizeof(mask)-1] = 0; + if (strcmp(mask, tmp) == 0) + return 1; + return 0; } int main() { - init_dictionary(); + char *c; + + init_dictionary(&dakcentiga, "akcentiga"); + + if (c = getenv("CONTENT_LENGTH")) + { + content_length = atoi(c); + is_http = 1; + } + if (is_http) + { + print_http_header(); + if (!eat_form_ok()) + return -1; + } process_text(stdin, 0, -1); - end_dictionary(); + end_dictionary(&dakcentiga); + + return 0; } diff -r b4e251400e36 -r d95d9e7a2b81 zrustest Binary file zrustest has changed