Initial. Awful makefile.
authorviric@llimona
Sun, 05 Aug 2007 23:06:42 +0200
changeset 0 7f37716d4f1e
child 1 5af08d964c9e
Initial. Awful makefile.
Makefile
dict.c
dictre.h
filter.c
idx2index.c
load.c
main.c
sort.c
write.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,5 @@
+CFLAGS=-g
+CC=gcc
+
+dictre: load.o dict.o write.o sort.o filter.o main.o
+	$(CC) -o $@ $^
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dict.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,157 @@
+#include <stdio.h>
+#include "dictre.h"
+
+static char * get_string(FILE *index, char *buffer)
+{
+    int c;
+    int count;
+
+    count = 0;
+    do
+    {
+        c = fgetc(index);
+        if(c == EOF)
+        {
+            count = 1; /* for next [count-1] */
+            break;
+        }
+        buffer[count++] = (char) c;
+    } while(c != '\t' && c != '\n');
+
+    buffer[count-1] = '\0';
+}
+
+char * get_word(FILE *index)
+{
+    char buffer[500];
+    char *out;
+    int len;
+
+    get_string(index, buffer);
+
+    if ((len = strlen(buffer)) > 0)
+    {
+        out = (char *) malloc(len + 1);
+        strcpy(out, buffer);
+    }
+    else
+        out = 0;
+
+    return out;
+}
+
+static int char2val(char letter)
+{
+    switch(letter)
+    {
+        case 'A': return 0;
+        case 'B': return 1;
+        case 'C': return 2;
+        case 'D': return 3;
+        case 'E': return 4;
+        case 'F': return 5;
+        case 'G': return 6;
+        case 'H': return 7;
+        case 'I': return 8;
+        case 'J': return 9;
+        case 'K': return 10;
+        case 'L': return 11;
+        case 'M': return 12;
+        case 'N': return 13;
+        case 'O': return 14;
+        case 'P': return 15;
+        case 'Q': return 16;
+        case 'R': return 17;
+        case 'S': return 18;
+        case 'T': return 19;
+        case 'U': return 20;
+        case 'V': return 21;
+        case 'W': return 22;
+        case 'X': return 23;
+        case 'Y': return 24;
+        case 'Z': return 25;
+        case 'a': return 26;
+        case 'b': return 27;
+        case 'c': return 28;
+        case 'd': return 29;
+        case 'e': return 30;
+        case 'f': return 31;
+        case 'g': return 32;
+        case 'h': return 33;
+        case 'i': return 34;
+        case 'j': return 35;
+        case 'k': return 36;
+        case 'l': return 37;
+        case 'm': return 38;
+        case 'n': return 39;
+        case 'o': return 40;
+        case 'p': return 41;
+        case 'q': return 42;
+        case 'r': return 43;
+        case 's': return 44;
+        case 't': return 45;
+        case 'u': return 46;
+        case 'v': return 47;
+        case 'w': return 48;
+        case 'x': return 49;
+        case 'y': return 50;
+        case 'z': return 51;
+        case '0': return 52;
+        case '1': return 53;
+        case '2': return 54;
+        case '3': return 55;
+        case '4': return 56;
+        case '5': return 57;
+        case '6': return 58;
+        case '7': return 59;
+        case '8': return 60;
+        case '9': return 61;
+        case '+': return 62;
+        case '/': return 63;
+        default:
+                  return 0;
+    }
+}
+
+static int str2int(const char *str)
+{
+    int i = 0;
+    int length;
+    int val = 0;
+
+    length = strlen(str);
+
+    while (i < length)
+    {
+        val = char2val(str[i]) + val * 64;
+        ++i;
+    }
+
+    return val;
+}
+
+int get_int(FILE *index)
+{
+    char buffer[500];
+    int val;
+
+    get_string(index, buffer);
+
+    if (strlen(buffer) > 0)
+    {
+        val = str2int(buffer);
+    } else
+        val = -1;
+
+    return val;
+}
+
+char * get_def(FILE *fdefs, int offset, int length)
+{
+    char *out;
+    fseek(fdefs, offset, SEEK_SET);
+
+    out = (char *) malloc(length);
+    fread(out, length, 1, fdefs);
+    return out;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dictre.h	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,37 @@
+struct Words
+{
+    struct Word *first;
+    int length;
+};
+
+struct Word
+{
+    char *w;
+    int def;
+};
+
+struct Def
+{
+    char *d;
+    int offset;
+    int length;
+};
+
+/* write.c */
+void write_dictionary(const char *name);
+
+/* dict.c */
+char * get_word(FILE *index);
+int get_int(FILE *index);
+char * get_def(FILE *fdefs, int offset, int length);
+
+/* filter.c */
+void filter_all(const char *filter_par);
+
+/* sort.c */
+int sort_words();
+
+/* load.c */
+void load_init();
+void load_dictionary(FILE *index, FILE *fdefs);
+void print_words();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/select.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "dictre.h"
+
+extern struct Def defs[];
+extern int ndefs;
+extern int dont_touch[];
+extern int ndont_touch;
+
+static void more_memory(void **ptr, int size)
+{
+    void *new;
+
+    new = realloc(*ptr, size);
+    *ptr = new;
+}
+
+static
+char * manage_filter(const char *def, int deflen, int writeto, int readfrom,
+        int *outlen)
+{
+    int maxfd;
+    int defptr;
+    int outptr;
+    char *out;
+    int outsize;
+    int outrest;
+
+    out = 0;
+    outsize = 1000;
+    outptr = 0;
+    more_memory((void **) &out, outsize);
+    outrest = 1000;
+
+    maxfd = writeto;
+    if (readfrom > maxfd)
+        maxfd = readfrom;
+
+    defptr = 0;
+    do
+    {
+        fd_set writeset, readset;
+        FD_ZERO(&writeset);
+        FD_ZERO(&readset);
+        if (defptr < deflen)
+            FD_SET(writeto, &writeset);
+        FD_SET(readfrom, &readset);
+
+        select(maxfd+1, &readset, &writeset, 0, 0);
+
+        if (FD_ISSET(readfrom, &readset))
+        {
+            int res;
+            res = read(readfrom, out + outptr, outrest);
+            if (res == 0)
+            {
+                close(readfrom);
+                break;
+            }
+            outrest -= res;
+            outptr += res;
+            if (outrest == 0)
+            {
+                outrest = 1000;
+                outsize += 1000;
+                more_memory((void **) &out, outsize);
+            }
+        }
+
+        if (FD_ISSET(writeto, &writeset))
+        {
+            int res;
+            res = write(writeto, def+defptr, 1);
+            defptr++;
+            if (defptr >= deflen)
+                close(writeto);
+        }
+    } while(1);
+
+    if (defptr < deflen)
+    {
+        fprintf(stderr, "Error in filter! not all written.\n");
+        exit(-1);
+    }
+
+    *outlen = outptr;
+    return out;
+}
+
+static char * filter(char *def, int deflen, const char *filter_par, int *outlen)
+{
+    int write_pipe[2];
+    int read_pipe[2];
+    int pid;
+    int res;
+    int status;
+    char *out;
+
+    pipe(write_pipe);
+    pipe(read_pipe);
+
+
+    pid = fork();
+    switch(pid)
+    {
+        case 0:  /* child */
+            close(0);
+            dup(write_pipe[0]);
+            close(write_pipe[0]);
+            close(write_pipe[1]);
+            close(1);
+            dup(read_pipe[1]);
+            close(read_pipe[1]);
+            close(read_pipe[0]);
+            execlp("bash", "bash", "-c", filter_par, 0);
+            perror("execlp");
+            exit(-1);
+            break;
+        case -1:
+            perror("fork");
+            exit(-1);
+            break;
+        default:  /* parent */
+            close(write_pipe[0]);
+            close(read_pipe[1]);
+            break;
+    }
+
+    /* parent */
+    out = manage_filter(def, deflen, write_pipe[1], read_pipe[0], outlen);
+
+    res = wait(&status);
+    if (res != pid || WEXITSTATUS(status) != 0)
+    {
+        fprintf(stderr, "Error filtering: pid=%i status=%i",
+                pid, WEXITSTATUS(status));
+        exit(-1);
+    }
+
+    return out;
+}
+
+static int in_dont_touch(int n)
+{
+    int i;
+    for(i =0; i < ndont_touch; ++i)
+    {
+        if (n == dont_touch[i])
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+void filter_all(const char *filter_par)
+{
+    int i;
+
+    for(i=0; i < ndefs; ++i)
+    {
+        char *newdef;
+        int newdeflen;
+        if (!in_dont_touch(i))
+        {
+            newdef = filter(defs[i].d, defs[i].length,
+                    filter_par, &newdeflen);
+            defs[i].length = newdeflen;
+            defs[i].d = newdef;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/idx2index.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <netinet/in.h>
+
+/*
+ * idx2index - Part of the flow to convert a StarDict index to a dictd index.
+ *             Author: LluĂ­s Batlle
+ * In order to convert a StarDict idx file to a dictd index file, pass:
+ * ./idx2index < file.idx | LC_ALL=POSIX sort > file.index
+ * */
+
+static char num_to_ia5char(int n)
+{
+    /* From RFC 1421 */
+    if (n >= 0 && n <= 25)
+        return 'A' + n;
+    else if (n >= 26 && n <= 51)
+        return 'a' + (n - 26);
+    else if (n >= 52 && n <= 61)
+        return '0' + (n - 52);
+    else if (n == 62)
+        return '+';
+    else if (n == 63)
+        return '-';
+    else
+        return '!'; /* Error */
+}
+
+static int num_to_ia5(char *dest, int n)
+{
+    char tmp[20];
+
+    int i, max;
+   
+    for(i =0; i <= 10; ++i)
+    {
+        tmp[i] = num_to_ia5char(n % 64);
+        if (n < 64)
+            break;
+        n /= 64;
+    }
+
+    max = i;
+
+    /* reverse the number */
+    for (i=0; i<=max; ++i)
+        dest[i] = tmp[max-i];
+
+    /* Ending '\0' */
+    dest[max+1] = '\0';
+    return max;
+}
+
+static int get_int()
+{
+    int i;
+    fread(&i, sizeof(int) , 1, stdin);
+    i = ntohl(i); /* Network to Host order */
+    return i;
+}
+
+static int get_word(char * word)
+{
+    int c;
+    int count = 0;
+
+    do
+    {
+        c = getchar();
+        if (c == EOF)
+            break;
+        word[count] = (char) c;
+        ++count;
+    } while (c != 0);
+    return count;
+}
+
+int main()
+{
+    char word[256];
+
+    do
+    {
+        int offset, length;
+        int res;
+        char c_offset[20], c_length[20];
+
+        res = get_word(word);
+        if (res == 0)
+            break;
+        offset = get_int();
+        num_to_ia5(c_offset, offset);
+        length = get_int();
+        num_to_ia5(c_length, length);
+        printf("%s\t%s\t%s\n", word, c_offset, c_length);
+    } while(1);
+    return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/load.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,99 @@
+#include <stdio.h>
+
+#include "dictre.h"
+
+enum
+{
+    MAX=500000
+};
+
+struct Word words[MAX];
+int nwords;
+struct Def defs[MAX];
+int ndefs;
+int dont_touch[20];
+int ndont_touch;
+
+void load_init()
+{
+    ndefs = 0;
+    nwords = 0;
+    ndont_touch = 0;
+}
+
+static void new_word(struct Word *from)
+{
+    memcpy(&words[nwords], from, sizeof(*from));
+    nwords++;
+}
+
+static void new_dont_touch(int n)
+{
+    dont_touch[ndont_touch++] = n;
+}
+
+static int new_def(char *def, int offset, int length)
+{
+    defs[ndefs].d = def;
+    defs[ndefs].offset = offset;
+    defs[ndefs].length = length;
+    return ndefs++;
+}
+
+static int search_def(int offset, int length)
+{
+    int i;
+
+    for(i=0; i < ndefs; ++i)
+    {
+        if (defs[i].offset == offset &&
+                defs[i].length == length)
+            return i;
+    }
+    return -1;
+}
+
+static void print_word(struct Word *w)
+{
+    printf("%s\t%i\n", w->w, w->def);
+}
+
+void load_dictionary(FILE *index, FILE *fdefs)
+{
+    struct Word w;
+    int last_offset = 0;
+
+    do {
+        int offset, length;
+        char *defstr;
+        w.w = get_word(index);
+        if (w.w == 0)
+            break;
+        offset = get_int(index);
+        length = get_int(index);
+        if (offset > last_offset)
+        {
+            w.def = -1;
+            last_offset = offset;
+        }
+        else
+            w.def = search_def(offset, length);
+        if (w.def == -1)
+        {
+            defstr = get_def(fdefs, offset, length);
+            w.def = new_def(defstr, offset, length);
+        }
+        /* sizeof -1  instead of strlen() */
+        if (strncmp(w.w, "00database", sizeof("00database") - 1) == 0)
+                new_dont_touch(w.def);
+        new_word(&w);
+    } while(1);
+}
+
+void print_words()
+{
+    int i;
+
+    for(i=0; i < nwords; ++i)
+        print_word(&words[i]);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,57 @@
+#include <stdio.h>
+
+#include "dictre.h"
+
+extern int nwords;
+extern int ndefs;
+
+int main(int argn, char **argv)
+{
+    char tmpname[500];
+    FILE *i, *d;
+
+    if (argn < 4)
+    {
+        fprintf(stderr, "usage: %s <dict_basename> "
+                "<dict_basename_out> <filter>\n",
+                argv[0]);
+        return 1;
+    }
+    strcpy(tmpname, argv[1]);
+    strcat(tmpname, ".index");
+    i = fopen(tmpname, "r");
+    if(i == NULL)
+    {
+        fprintf(stderr, "File: %s ", tmpname);
+        perror("- cannot open file.");
+        exit(-1);
+    }
+
+    strcpy(tmpname, argv[1]);
+    strcat(tmpname, ".dict");
+    d = fopen(tmpname, "r");
+    if(d == NULL)
+    {
+        fprintf(stderr, "File: %s ", tmpname);
+        perror("- cannot open file.");
+        exit(-1);
+    }
+
+    load_init();
+
+    load_dictionary(i, d);
+
+    fclose(i);
+    fclose(d);
+
+    sort_words();
+
+    if (0)
+        print_words();
+
+    filter_all(argv[3]);
+
+    write_dictionary(argv[2]);
+
+    return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sort.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "dictre.h"
+
+extern struct Word words[];
+extern int nwords;
+
+static int cmp_words(const void *v1, const void *v2)
+{
+    struct Word *w1 = (struct Word *) v1;
+    struct Word *w2 = (struct Word *) v2;
+
+    return strcmp(w1->w, w2->w);
+}
+
+int sort_words()
+{
+    qsort(words, nwords, sizeof(words[0]), cmp_words);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/write.c	Sun Aug 05 23:06:42 2007 +0200
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include "dictre.h"
+
+extern struct Word words[];
+extern int nwords;
+extern struct Def defs[];
+extern int ndefs;
+
+static char num_to_ia5char(int n)
+{
+    /* From RFC 1421 */
+    if (n >= 0 && n <= 25)
+        return 'A' + n;
+    else if (n >= 26 && n <= 51)
+        return 'a' + (n - 26);
+    else if (n >= 52 && n <= 61)
+        return '0' + (n - 52);
+    else if (n == 62)
+        return '+';
+    else if (n == 63)
+        return '-';
+    else
+        return '!'; /* Error */
+}
+
+static int num_to_ia5(char *dest, int n)
+{
+    char tmp[20];
+
+    int i, max;
+   
+    for(i =0; i <= 10; ++i)
+    {
+        tmp[i] = num_to_ia5char(n % 64);
+        if (n < 64)
+            break;
+        n /= 64;
+    }
+
+    max = i;
+
+    /* reverse the number */
+    for (i=0; i<=max; ++i)
+        dest[i] = tmp[max-i];
+
+    /* Ending '\0' */
+    dest[max+1] = '\0';
+    return max;
+}
+
+static int write_dictionary_data(FILE *fdefs)
+{
+    int i;
+
+    int offset = 0;
+
+    for (i=0; i < ndefs; ++i)
+    {
+        fwrite(defs[i].d, defs[i].length, 1, fdefs);
+        defs[i].offset = offset;
+        offset += defs[i].length;
+    }
+}
+
+static void write_dictionary_index(FILE *index)
+{
+    int i;
+
+    for (i=0; i < nwords; ++i)
+    {
+        char offset_str[50];
+        char length_str[50];
+
+        num_to_ia5(offset_str, defs[words[i].def].offset);
+        num_to_ia5(length_str, defs[words[i].def].length);
+        fprintf(index, "%s\t%s\t%s\n",
+                words[i].w, offset_str, length_str);
+    }
+}
+
+void write_dictionary(const char *name)
+{
+    FILE *i, *d;
+    char tmpname[500];
+
+    strcpy(tmpname, name);
+    strcat(tmpname, ".dict");
+    d = fopen(tmpname, "wb");
+    write_dictionary_data(d);
+    fclose(d);
+
+    strcpy(tmpname, name);
+    strcat(tmpname, ".index");
+    i = fopen(tmpname, "wb");
+    write_dictionary_index(i);
+    fclose(i);
+}