Initial. Awful makefile.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,5 @@
+CFLAGS=-g
+CC=gcc
+
+dictre: load.o dict.o write.o sort.o filter.o main.o
+ $(CC) -o $@ $^
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dict.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,157 @@
+#include <stdio.h>
+#include "dictre.h"
+
+static char * get_string(FILE *index, char *buffer)
+{
+ int c;
+ int count;
+
+ count = 0;
+ do
+ {
+ c = fgetc(index);
+ if(c == EOF)
+ {
+ count = 1; /* for next [count-1] */
+ break;
+ }
+ buffer[count++] = (char) c;
+ } while(c != '\t' && c != '\n');
+
+ buffer[count-1] = '\0';
+}
+
+char * get_word(FILE *index)
+{
+ char buffer[500];
+ char *out;
+ int len;
+
+ get_string(index, buffer);
+
+ if ((len = strlen(buffer)) > 0)
+ {
+ out = (char *) malloc(len + 1);
+ strcpy(out, buffer);
+ }
+ else
+ out = 0;
+
+ return out;
+}
+
+static int char2val(char letter)
+{
+ switch(letter)
+ {
+ case 'A': return 0;
+ case 'B': return 1;
+ case 'C': return 2;
+ case 'D': return 3;
+ case 'E': return 4;
+ case 'F': return 5;
+ case 'G': return 6;
+ case 'H': return 7;
+ case 'I': return 8;
+ case 'J': return 9;
+ case 'K': return 10;
+ case 'L': return 11;
+ case 'M': return 12;
+ case 'N': return 13;
+ case 'O': return 14;
+ case 'P': return 15;
+ case 'Q': return 16;
+ case 'R': return 17;
+ case 'S': return 18;
+ case 'T': return 19;
+ case 'U': return 20;
+ case 'V': return 21;
+ case 'W': return 22;
+ case 'X': return 23;
+ case 'Y': return 24;
+ case 'Z': return 25;
+ case 'a': return 26;
+ case 'b': return 27;
+ case 'c': return 28;
+ case 'd': return 29;
+ case 'e': return 30;
+ case 'f': return 31;
+ case 'g': return 32;
+ case 'h': return 33;
+ case 'i': return 34;
+ case 'j': return 35;
+ case 'k': return 36;
+ case 'l': return 37;
+ case 'm': return 38;
+ case 'n': return 39;
+ case 'o': return 40;
+ case 'p': return 41;
+ case 'q': return 42;
+ case 'r': return 43;
+ case 's': return 44;
+ case 't': return 45;
+ case 'u': return 46;
+ case 'v': return 47;
+ case 'w': return 48;
+ case 'x': return 49;
+ case 'y': return 50;
+ case 'z': return 51;
+ case '0': return 52;
+ case '1': return 53;
+ case '2': return 54;
+ case '3': return 55;
+ case '4': return 56;
+ case '5': return 57;
+ case '6': return 58;
+ case '7': return 59;
+ case '8': return 60;
+ case '9': return 61;
+ case '+': return 62;
+ case '/': return 63;
+ default:
+ return 0;
+ }
+}
+
+static int str2int(const char *str)
+{
+ int i = 0;
+ int length;
+ int val = 0;
+
+ length = strlen(str);
+
+ while (i < length)
+ {
+ val = char2val(str[i]) + val * 64;
+ ++i;
+ }
+
+ return val;
+}
+
+int get_int(FILE *index)
+{
+ char buffer[500];
+ int val;
+
+ get_string(index, buffer);
+
+ if (strlen(buffer) > 0)
+ {
+ val = str2int(buffer);
+ } else
+ val = -1;
+
+ return val;
+}
+
+char * get_def(FILE *fdefs, int offset, int length)
+{
+ char *out;
+ fseek(fdefs, offset, SEEK_SET);
+
+ out = (char *) malloc(length);
+ fread(out, length, 1, fdefs);
+ return out;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dictre.h Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,37 @@
+struct Words
+{
+ struct Word *first;
+ int length;
+};
+
+struct Word
+{
+ char *w;
+ int def;
+};
+
+struct Def
+{
+ char *d;
+ int offset;
+ int length;
+};
+
+/* write.c */
+void write_dictionary(const char *name);
+
+/* dict.c */
+char * get_word(FILE *index);
+int get_int(FILE *index);
+char * get_def(FILE *fdefs, int offset, int length);
+
+/* filter.c */
+void filter_all(const char *filter_par);
+
+/* sort.c */
+int sort_words();
+
+/* load.c */
+void load_init();
+void load_dictionary(FILE *index, FILE *fdefs);
+void print_words();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/select.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "dictre.h"
+
+extern struct Def defs[];
+extern int ndefs;
+extern int dont_touch[];
+extern int ndont_touch;
+
+static void more_memory(void **ptr, int size)
+{
+ void *new;
+
+ new = realloc(*ptr, size);
+ *ptr = new;
+}
+
+static
+char * manage_filter(const char *def, int deflen, int writeto, int readfrom,
+ int *outlen)
+{
+ int maxfd;
+ int defptr;
+ int outptr;
+ char *out;
+ int outsize;
+ int outrest;
+
+ out = 0;
+ outsize = 1000;
+ outptr = 0;
+ more_memory((void **) &out, outsize);
+ outrest = 1000;
+
+ maxfd = writeto;
+ if (readfrom > maxfd)
+ maxfd = readfrom;
+
+ defptr = 0;
+ do
+ {
+ fd_set writeset, readset;
+ FD_ZERO(&writeset);
+ FD_ZERO(&readset);
+ if (defptr < deflen)
+ FD_SET(writeto, &writeset);
+ FD_SET(readfrom, &readset);
+
+ select(maxfd+1, &readset, &writeset, 0, 0);
+
+ if (FD_ISSET(readfrom, &readset))
+ {
+ int res;
+ res = read(readfrom, out + outptr, outrest);
+ if (res == 0)
+ {
+ close(readfrom);
+ break;
+ }
+ outrest -= res;
+ outptr += res;
+ if (outrest == 0)
+ {
+ outrest = 1000;
+ outsize += 1000;
+ more_memory((void **) &out, outsize);
+ }
+ }
+
+ if (FD_ISSET(writeto, &writeset))
+ {
+ int res;
+ res = write(writeto, def+defptr, 1);
+ defptr++;
+ if (defptr >= deflen)
+ close(writeto);
+ }
+ } while(1);
+
+ if (defptr < deflen)
+ {
+ fprintf(stderr, "Error in filter! not all written.\n");
+ exit(-1);
+ }
+
+ *outlen = outptr;
+ return out;
+}
+
+static char * filter(char *def, int deflen, const char *filter_par, int *outlen)
+{
+ int write_pipe[2];
+ int read_pipe[2];
+ int pid;
+ int res;
+ int status;
+ char *out;
+
+ pipe(write_pipe);
+ pipe(read_pipe);
+
+
+ pid = fork();
+ switch(pid)
+ {
+ case 0: /* child */
+ close(0);
+ dup(write_pipe[0]);
+ close(write_pipe[0]);
+ close(write_pipe[1]);
+ close(1);
+ dup(read_pipe[1]);
+ close(read_pipe[1]);
+ close(read_pipe[0]);
+ execlp("bash", "bash", "-c", filter_par, 0);
+ perror("execlp");
+ exit(-1);
+ break;
+ case -1:
+ perror("fork");
+ exit(-1);
+ break;
+ default: /* parent */
+ close(write_pipe[0]);
+ close(read_pipe[1]);
+ break;
+ }
+
+ /* parent */
+ out = manage_filter(def, deflen, write_pipe[1], read_pipe[0], outlen);
+
+ res = wait(&status);
+ if (res != pid || WEXITSTATUS(status) != 0)
+ {
+ fprintf(stderr, "Error filtering: pid=%i status=%i",
+ pid, WEXITSTATUS(status));
+ exit(-1);
+ }
+
+ return out;
+}
+
+static int in_dont_touch(int n)
+{
+ int i;
+ for(i =0; i < ndont_touch; ++i)
+ {
+ if (n == dont_touch[i])
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void filter_all(const char *filter_par)
+{
+ int i;
+
+ for(i=0; i < ndefs; ++i)
+ {
+ char *newdef;
+ int newdeflen;
+ if (!in_dont_touch(i))
+ {
+ newdef = filter(defs[i].d, defs[i].length,
+ filter_par, &newdeflen);
+ defs[i].length = newdeflen;
+ defs[i].d = newdef;
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/idx2index.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <netinet/in.h>
+
+/*
+ * idx2index - Part of the flow to convert a StarDict index to a dictd index.
+ * Author: LluĂs Batlle
+ * In order to convert a StarDict idx file to a dictd index file, pass:
+ * ./idx2index < file.idx | LC_ALL=POSIX sort > file.index
+ * */
+
+static char num_to_ia5char(int n)
+{
+ /* From RFC 1421 */
+ if (n >= 0 && n <= 25)
+ return 'A' + n;
+ else if (n >= 26 && n <= 51)
+ return 'a' + (n - 26);
+ else if (n >= 52 && n <= 61)
+ return '0' + (n - 52);
+ else if (n == 62)
+ return '+';
+ else if (n == 63)
+ return '-';
+ else
+ return '!'; /* Error */
+}
+
+static int num_to_ia5(char *dest, int n)
+{
+ char tmp[20];
+
+ int i, max;
+
+ for(i =0; i <= 10; ++i)
+ {
+ tmp[i] = num_to_ia5char(n % 64);
+ if (n < 64)
+ break;
+ n /= 64;
+ }
+
+ max = i;
+
+ /* reverse the number */
+ for (i=0; i<=max; ++i)
+ dest[i] = tmp[max-i];
+
+ /* Ending '\0' */
+ dest[max+1] = '\0';
+ return max;
+}
+
+static int get_int()
+{
+ int i;
+ fread(&i, sizeof(int) , 1, stdin);
+ i = ntohl(i); /* Network to Host order */
+ return i;
+}
+
+static int get_word(char * word)
+{
+ int c;
+ int count = 0;
+
+ do
+ {
+ c = getchar();
+ if (c == EOF)
+ break;
+ word[count] = (char) c;
+ ++count;
+ } while (c != 0);
+ return count;
+}
+
+int main()
+{
+ char word[256];
+
+ do
+ {
+ int offset, length;
+ int res;
+ char c_offset[20], c_length[20];
+
+ res = get_word(word);
+ if (res == 0)
+ break;
+ offset = get_int();
+ num_to_ia5(c_offset, offset);
+ length = get_int();
+ num_to_ia5(c_length, length);
+ printf("%s\t%s\t%s\n", word, c_offset, c_length);
+ } while(1);
+ return 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/load.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,99 @@
+#include <stdio.h>
+
+#include "dictre.h"
+
+enum
+{
+ MAX=500000
+};
+
+struct Word words[MAX];
+int nwords;
+struct Def defs[MAX];
+int ndefs;
+int dont_touch[20];
+int ndont_touch;
+
+void load_init()
+{
+ ndefs = 0;
+ nwords = 0;
+ ndont_touch = 0;
+}
+
+static void new_word(struct Word *from)
+{
+ memcpy(&words[nwords], from, sizeof(*from));
+ nwords++;
+}
+
+static void new_dont_touch(int n)
+{
+ dont_touch[ndont_touch++] = n;
+}
+
+static int new_def(char *def, int offset, int length)
+{
+ defs[ndefs].d = def;
+ defs[ndefs].offset = offset;
+ defs[ndefs].length = length;
+ return ndefs++;
+}
+
+static int search_def(int offset, int length)
+{
+ int i;
+
+ for(i=0; i < ndefs; ++i)
+ {
+ if (defs[i].offset == offset &&
+ defs[i].length == length)
+ return i;
+ }
+ return -1;
+}
+
+static void print_word(struct Word *w)
+{
+ printf("%s\t%i\n", w->w, w->def);
+}
+
+void load_dictionary(FILE *index, FILE *fdefs)
+{
+ struct Word w;
+ int last_offset = 0;
+
+ do {
+ int offset, length;
+ char *defstr;
+ w.w = get_word(index);
+ if (w.w == 0)
+ break;
+ offset = get_int(index);
+ length = get_int(index);
+ if (offset > last_offset)
+ {
+ w.def = -1;
+ last_offset = offset;
+ }
+ else
+ w.def = search_def(offset, length);
+ if (w.def == -1)
+ {
+ defstr = get_def(fdefs, offset, length);
+ w.def = new_def(defstr, offset, length);
+ }
+ /* sizeof -1 instead of strlen() */
+ if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
+ new_dont_touch(w.def);
+ new_word(&w);
+ } while(1);
+}
+
+void print_words()
+{
+ int i;
+
+ for(i=0; i < nwords; ++i)
+ print_word(&words[i]);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/main.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,57 @@
+#include <stdio.h>
+
+#include "dictre.h"
+
+extern int nwords;
+extern int ndefs;
+
+int main(int argn, char **argv)
+{
+ char tmpname[500];
+ FILE *i, *d;
+
+ if (argn < 4)
+ {
+ fprintf(stderr, "usage: %s <dict_basename> "
+ "<dict_basename_out> <filter>\n",
+ argv[0]);
+ return 1;
+ }
+ strcpy(tmpname, argv[1]);
+ strcat(tmpname, ".index");
+ i = fopen(tmpname, "r");
+ if(i == NULL)
+ {
+ fprintf(stderr, "File: %s ", tmpname);
+ perror("- cannot open file.");
+ exit(-1);
+ }
+
+ strcpy(tmpname, argv[1]);
+ strcat(tmpname, ".dict");
+ d = fopen(tmpname, "r");
+ if(d == NULL)
+ {
+ fprintf(stderr, "File: %s ", tmpname);
+ perror("- cannot open file.");
+ exit(-1);
+ }
+
+ load_init();
+
+ load_dictionary(i, d);
+
+ fclose(i);
+ fclose(d);
+
+ sort_words();
+
+ if (0)
+ print_words();
+
+ filter_all(argv[3]);
+
+ write_dictionary(argv[2]);
+
+ return 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sort.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "dictre.h"
+
+extern struct Word words[];
+extern int nwords;
+
+static int cmp_words(const void *v1, const void *v2)
+{
+ struct Word *w1 = (struct Word *) v1;
+ struct Word *w2 = (struct Word *) v2;
+
+ return strcmp(w1->w, w2->w);
+}
+
+int sort_words()
+{
+ qsort(words, nwords, sizeof(words[0]), cmp_words);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/write.c Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include "dictre.h"
+
+extern struct Word words[];
+extern int nwords;
+extern struct Def defs[];
+extern int ndefs;
+
+static char num_to_ia5char(int n)
+{
+ /* From RFC 1421 */
+ if (n >= 0 && n <= 25)
+ return 'A' + n;
+ else if (n >= 26 && n <= 51)
+ return 'a' + (n - 26);
+ else if (n >= 52 && n <= 61)
+ return '0' + (n - 52);
+ else if (n == 62)
+ return '+';
+ else if (n == 63)
+ return '-';
+ else
+ return '!'; /* Error */
+}
+
+static int num_to_ia5(char *dest, int n)
+{
+ char tmp[20];
+
+ int i, max;
+
+ for(i =0; i <= 10; ++i)
+ {
+ tmp[i] = num_to_ia5char(n % 64);
+ if (n < 64)
+ break;
+ n /= 64;
+ }
+
+ max = i;
+
+ /* reverse the number */
+ for (i=0; i<=max; ++i)
+ dest[i] = tmp[max-i];
+
+ /* Ending '\0' */
+ dest[max+1] = '\0';
+ return max;
+}
+
+static int write_dictionary_data(FILE *fdefs)
+{
+ int i;
+
+ int offset = 0;
+
+ for (i=0; i < ndefs; ++i)
+ {
+ fwrite(defs[i].d, defs[i].length, 1, fdefs);
+ defs[i].offset = offset;
+ offset += defs[i].length;
+ }
+}
+
+static void write_dictionary_index(FILE *index)
+{
+ int i;
+
+ for (i=0; i < nwords; ++i)
+ {
+ char offset_str[50];
+ char length_str[50];
+
+ num_to_ia5(offset_str, defs[words[i].def].offset);
+ num_to_ia5(length_str, defs[words[i].def].length);
+ fprintf(index, "%s\t%s\t%s\n",
+ words[i].w, offset_str, length_str);
+ }
+}
+
+void write_dictionary(const char *name)
+{
+ FILE *i, *d;
+ char tmpname[500];
+
+ strcpy(tmpname, name);
+ strcat(tmpname, ".dict");
+ d = fopen(tmpname, "wb");
+ write_dictionary_data(d);
+ fclose(d);
+
+ strcpy(tmpname, name);
+ strcat(tmpname, ".index");
+ i = fopen(tmpname, "wb");
+ write_dictionary_index(i);
+ fclose(i);
+}