# HG changeset patch # User viric@llimona # Date 1186348002 -7200 # Node ID 7f37716d4f1e0d2fdae3d16948b4a6ebb68585a5 Initial. Awful makefile. diff -r 000000000000 -r 7f37716d4f1e Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,5 @@ +CFLAGS=-g +CC=gcc + +dictre: load.o dict.o write.o sort.o filter.o main.o + $(CC) -o $@ $^ diff -r 000000000000 -r 7f37716d4f1e dict.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dict.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,157 @@ +#include +#include "dictre.h" + +static char * get_string(FILE *index, char *buffer) +{ + int c; + int count; + + count = 0; + do + { + c = fgetc(index); + if(c == EOF) + { + count = 1; /* for next [count-1] */ + break; + } + buffer[count++] = (char) c; + } while(c != '\t' && c != '\n'); + + buffer[count-1] = '\0'; +} + +char * get_word(FILE *index) +{ + char buffer[500]; + char *out; + int len; + + get_string(index, buffer); + + if ((len = strlen(buffer)) > 0) + { + out = (char *) malloc(len + 1); + strcpy(out, buffer); + } + else + out = 0; + + return out; +} + +static int char2val(char letter) +{ + switch(letter) + { + case 'A': return 0; + case 'B': return 1; + case 'C': return 2; + case 'D': return 3; + case 'E': return 4; + case 'F': return 5; + case 'G': return 6; + case 'H': return 7; + case 'I': return 8; + case 'J': return 9; + case 'K': return 10; + case 'L': return 11; + case 'M': return 12; + case 'N': return 13; + case 'O': return 14; + case 'P': return 15; + case 'Q': return 16; + case 'R': return 17; + case 'S': return 18; + case 'T': return 19; + case 'U': return 20; + case 'V': return 21; + case 'W': return 22; + case 'X': return 23; + case 'Y': return 24; + case 'Z': return 25; + case 'a': return 26; + case 'b': return 27; + case 'c': return 28; + case 'd': return 29; + case 'e': return 30; + case 'f': return 31; + case 'g': return 32; + case 'h': return 33; + case 'i': return 34; + case 'j': return 35; + case 'k': return 36; + case 'l': return 37; + case 'm': return 38; + case 'n': return 39; + case 'o': return 40; + case 'p': return 41; + case 'q': return 42; + case 'r': return 43; + case 's': return 44; + case 't': return 45; + case 'u': return 46; + case 'v': return 47; + case 'w': return 48; + case 'x': return 49; + case 'y': return 50; + case 'z': return 51; + case '0': return 52; + case '1': return 53; + case '2': return 54; + case '3': return 55; + case '4': return 56; + case '5': return 57; + case '6': return 58; + case '7': return 59; + case '8': return 60; + case '9': return 61; + case '+': return 62; + case '/': return 63; + default: + return 0; + } +} + +static int str2int(const char *str) +{ + int i = 0; + int length; + int val = 0; + + length = strlen(str); + + while (i < length) + { + val = char2val(str[i]) + val * 64; + ++i; + } + + return val; +} + +int get_int(FILE *index) +{ + char buffer[500]; + int val; + + get_string(index, buffer); + + if (strlen(buffer) > 0) + { + val = str2int(buffer); + } else + val = -1; + + return val; +} + +char * get_def(FILE *fdefs, int offset, int length) +{ + char *out; + fseek(fdefs, offset, SEEK_SET); + + out = (char *) malloc(length); + fread(out, length, 1, fdefs); + return out; +} diff -r 000000000000 -r 7f37716d4f1e dictre.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dictre.h Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,37 @@ +struct Words +{ + struct Word *first; + int length; +}; + +struct Word +{ + char *w; + int def; +}; + +struct Def +{ + char *d; + int offset; + int length; +}; + +/* write.c */ +void write_dictionary(const char *name); + +/* dict.c */ +char * get_word(FILE *index); +int get_int(FILE *index); +char * get_def(FILE *fdefs, int offset, int length); + +/* filter.c */ +void filter_all(const char *filter_par); + +/* sort.c */ +int sort_words(); + +/* load.c */ +void load_init(); +void load_dictionary(FILE *index, FILE *fdefs); +void print_words(); diff -r 000000000000 -r 7f37716d4f1e filter.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include +#include + +#include "dictre.h" + +extern struct Def defs[]; +extern int ndefs; +extern int dont_touch[]; +extern int ndont_touch; + +static void more_memory(void **ptr, int size) +{ + void *new; + + new = realloc(*ptr, size); + *ptr = new; +} + +static +char * manage_filter(const char *def, int deflen, int writeto, int readfrom, + int *outlen) +{ + int maxfd; + int defptr; + int outptr; + char *out; + int outsize; + int outrest; + + out = 0; + outsize = 1000; + outptr = 0; + more_memory((void **) &out, outsize); + outrest = 1000; + + maxfd = writeto; + if (readfrom > maxfd) + maxfd = readfrom; + + defptr = 0; + do + { + fd_set writeset, readset; + FD_ZERO(&writeset); + FD_ZERO(&readset); + if (defptr < deflen) + FD_SET(writeto, &writeset); + FD_SET(readfrom, &readset); + + select(maxfd+1, &readset, &writeset, 0, 0); + + if (FD_ISSET(readfrom, &readset)) + { + int res; + res = read(readfrom, out + outptr, outrest); + if (res == 0) + { + close(readfrom); + break; + } + outrest -= res; + outptr += res; + if (outrest == 0) + { + outrest = 1000; + outsize += 1000; + more_memory((void **) &out, outsize); + } + } + + if (FD_ISSET(writeto, &writeset)) + { + int res; + res = write(writeto, def+defptr, 1); + defptr++; + if (defptr >= deflen) + close(writeto); + } + } while(1); + + if (defptr < deflen) + { + fprintf(stderr, "Error in filter! not all written.\n"); + exit(-1); + } + + *outlen = outptr; + return out; +} + +static char * filter(char *def, int deflen, const char *filter_par, int *outlen) +{ + int write_pipe[2]; + int read_pipe[2]; + int pid; + int res; + int status; + char *out; + + pipe(write_pipe); + pipe(read_pipe); + + + pid = fork(); + switch(pid) + { + case 0: /* child */ + close(0); + dup(write_pipe[0]); + close(write_pipe[0]); + close(write_pipe[1]); + close(1); + dup(read_pipe[1]); + close(read_pipe[1]); + close(read_pipe[0]); + execlp("bash", "bash", "-c", filter_par, 0); + perror("execlp"); + exit(-1); + break; + case -1: + perror("fork"); + exit(-1); + break; + default: /* parent */ + close(write_pipe[0]); + close(read_pipe[1]); + break; + } + + /* parent */ + out = manage_filter(def, deflen, write_pipe[1], read_pipe[0], outlen); + + res = wait(&status); + if (res != pid || WEXITSTATUS(status) != 0) + { + fprintf(stderr, "Error filtering: pid=%i status=%i", + pid, WEXITSTATUS(status)); + exit(-1); + } + + return out; +} + +static int in_dont_touch(int n) +{ + int i; + for(i =0; i < ndont_touch; ++i) + { + if (n == dont_touch[i]) + { + return 1; + } + } + return 0; +} + +void filter_all(const char *filter_par) +{ + int i; + + for(i=0; i < ndefs; ++i) + { + char *newdef; + int newdeflen; + if (!in_dont_touch(i)) + { + newdef = filter(defs[i].d, defs[i].length, + filter_par, &newdeflen); + defs[i].length = newdeflen; + defs[i].d = newdef; + } + } +} diff -r 000000000000 -r 7f37716d4f1e idx2index.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/idx2index.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,97 @@ +#include +#include + +/* + * idx2index - Part of the flow to convert a StarDict index to a dictd index. + * Author: LluĂ­s Batlle + * In order to convert a StarDict idx file to a dictd index file, pass: + * ./idx2index < file.idx | LC_ALL=POSIX sort > file.index + * */ + +static char num_to_ia5char(int n) +{ + /* From RFC 1421 */ + if (n >= 0 && n <= 25) + return 'A' + n; + else if (n >= 26 && n <= 51) + return 'a' + (n - 26); + else if (n >= 52 && n <= 61) + return '0' + (n - 52); + else if (n == 62) + return '+'; + else if (n == 63) + return '-'; + else + return '!'; /* Error */ +} + +static int num_to_ia5(char *dest, int n) +{ + char tmp[20]; + + int i, max; + + for(i =0; i <= 10; ++i) + { + tmp[i] = num_to_ia5char(n % 64); + if (n < 64) + break; + n /= 64; + } + + max = i; + + /* reverse the number */ + for (i=0; i<=max; ++i) + dest[i] = tmp[max-i]; + + /* Ending '\0' */ + dest[max+1] = '\0'; + return max; +} + +static int get_int() +{ + int i; + fread(&i, sizeof(int) , 1, stdin); + i = ntohl(i); /* Network to Host order */ + return i; +} + +static int get_word(char * word) +{ + int c; + int count = 0; + + do + { + c = getchar(); + if (c == EOF) + break; + word[count] = (char) c; + ++count; + } while (c != 0); + return count; +} + +int main() +{ + char word[256]; + + do + { + int offset, length; + int res; + char c_offset[20], c_length[20]; + + res = get_word(word); + if (res == 0) + break; + offset = get_int(); + num_to_ia5(c_offset, offset); + length = get_int(); + num_to_ia5(c_length, length); + printf("%s\t%s\t%s\n", word, c_offset, c_length); + } while(1); + return 0; +} diff -r 000000000000 -r 7f37716d4f1e load.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,99 @@ +#include + +#include "dictre.h" + +enum +{ + MAX=500000 +}; + +struct Word words[MAX]; +int nwords; +struct Def defs[MAX]; +int ndefs; +int dont_touch[20]; +int ndont_touch; + +void load_init() +{ + ndefs = 0; + nwords = 0; + ndont_touch = 0; +} + +static void new_word(struct Word *from) +{ + memcpy(&words[nwords], from, sizeof(*from)); + nwords++; +} + +static void new_dont_touch(int n) +{ + dont_touch[ndont_touch++] = n; +} + +static int new_def(char *def, int offset, int length) +{ + defs[ndefs].d = def; + defs[ndefs].offset = offset; + defs[ndefs].length = length; + return ndefs++; +} + +static int search_def(int offset, int length) +{ + int i; + + for(i=0; i < ndefs; ++i) + { + if (defs[i].offset == offset && + defs[i].length == length) + return i; + } + return -1; +} + +static void print_word(struct Word *w) +{ + printf("%s\t%i\n", w->w, w->def); +} + +void load_dictionary(FILE *index, FILE *fdefs) +{ + struct Word w; + int last_offset = 0; + + do { + int offset, length; + char *defstr; + w.w = get_word(index); + if (w.w == 0) + break; + offset = get_int(index); + length = get_int(index); + if (offset > last_offset) + { + w.def = -1; + last_offset = offset; + } + else + w.def = search_def(offset, length); + if (w.def == -1) + { + defstr = get_def(fdefs, offset, length); + w.def = new_def(defstr, offset, length); + } + /* sizeof -1 instead of strlen() */ + if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0) + new_dont_touch(w.def); + new_word(&w); + } while(1); +} + +void print_words() +{ + int i; + + for(i=0; i < nwords; ++i) + print_word(&words[i]); +} diff -r 000000000000 -r 7f37716d4f1e main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,57 @@ +#include + +#include "dictre.h" + +extern int nwords; +extern int ndefs; + +int main(int argn, char **argv) +{ + char tmpname[500]; + FILE *i, *d; + + if (argn < 4) + { + fprintf(stderr, "usage: %s " + " \n", + argv[0]); + return 1; + } + strcpy(tmpname, argv[1]); + strcat(tmpname, ".index"); + i = fopen(tmpname, "r"); + if(i == NULL) + { + fprintf(stderr, "File: %s ", tmpname); + perror("- cannot open file."); + exit(-1); + } + + strcpy(tmpname, argv[1]); + strcat(tmpname, ".dict"); + d = fopen(tmpname, "r"); + if(d == NULL) + { + fprintf(stderr, "File: %s ", tmpname); + perror("- cannot open file."); + exit(-1); + } + + load_init(); + + load_dictionary(i, d); + + fclose(i); + fclose(d); + + sort_words(); + + if (0) + print_words(); + + filter_all(argv[3]); + + write_dictionary(argv[2]); + + return 0; +} diff -r 000000000000 -r 7f37716d4f1e sort.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,19 @@ +#include +#include +#include "dictre.h" + +extern struct Word words[]; +extern int nwords; + +static int cmp_words(const void *v1, const void *v2) +{ + struct Word *w1 = (struct Word *) v1; + struct Word *w2 = (struct Word *) v2; + + return strcmp(w1->w, w2->w); +} + +int sort_words() +{ + qsort(words, nwords, sizeof(words[0]), cmp_words); +} diff -r 000000000000 -r 7f37716d4f1e write.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/write.c Sun Aug 05 23:06:42 2007 +0200 @@ -0,0 +1,97 @@ +#include +#include "dictre.h" + +extern struct Word words[]; +extern int nwords; +extern struct Def defs[]; +extern int ndefs; + +static char num_to_ia5char(int n) +{ + /* From RFC 1421 */ + if (n >= 0 && n <= 25) + return 'A' + n; + else if (n >= 26 && n <= 51) + return 'a' + (n - 26); + else if (n >= 52 && n <= 61) + return '0' + (n - 52); + else if (n == 62) + return '+'; + else if (n == 63) + return '-'; + else + return '!'; /* Error */ +} + +static int num_to_ia5(char *dest, int n) +{ + char tmp[20]; + + int i, max; + + for(i =0; i <= 10; ++i) + { + tmp[i] = num_to_ia5char(n % 64); + if (n < 64) + break; + n /= 64; + } + + max = i; + + /* reverse the number */ + for (i=0; i<=max; ++i) + dest[i] = tmp[max-i]; + + /* Ending '\0' */ + dest[max+1] = '\0'; + return max; +} + +static int write_dictionary_data(FILE *fdefs) +{ + int i; + + int offset = 0; + + for (i=0; i < ndefs; ++i) + { + fwrite(defs[i].d, defs[i].length, 1, fdefs); + defs[i].offset = offset; + offset += defs[i].length; + } +} + +static void write_dictionary_index(FILE *index) +{ + int i; + + for (i=0; i < nwords; ++i) + { + char offset_str[50]; + char length_str[50]; + + num_to_ia5(offset_str, defs[words[i].def].offset); + num_to_ia5(length_str, defs[words[i].def].length); + fprintf(index, "%s\t%s\t%s\n", + words[i].w, offset_str, length_str); + } +} + +void write_dictionary(const char *name) +{ + FILE *i, *d; + char tmpname[500]; + + strcpy(tmpname, name); + strcat(tmpname, ".dict"); + d = fopen(tmpname, "wb"); + write_dictionary_data(d); + fclose(d); + + strcpy(tmpname, name); + strcat(tmpname, ".index"); + i = fopen(tmpname, "wb"); + write_dictionary_index(i); + fclose(i); +}