zload.c
author viric@llimona
Sat, 01 Sep 2007 12:34:52 +0200
changeset 18 64ed4238657f
parent 11 68ea18fe402c
permissions -rw-r--r--
Fixed possible buffer overflow.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     2
#include <sys/stat.h>
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     3
#include "dictre.h"
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     4
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     5
static FILE *index, *dict;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     6
static remove_tmp_file = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     7
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     8
static new_word(const char *w, const char *defstr)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     9
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    10
    printf("'%s': '%s'\n", w, defstr);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    11
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    12
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    13
void zload_words(FILE *index, FILE *fdefs)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    14
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    15
    int last_offset = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    16
    int def_avoided = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    17
    int numword = 0;;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    18
    static int dispnwords = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    19
    static int nwords = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    20
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    21
    do {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    22
        int offset, length;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    23
        char *defstr;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    24
        char *word;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    25
        word = get_word(index);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    26
        /*numword++;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    27
        printf("words: %i\n", numword);*/
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    28
        if (word == 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    29
            break;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    30
        /*printf("Word: %s\n", w.w);*/
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    31
        offset = get_int(index);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    32
        length = get_int(index);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    33
        defstr = get_def(fdefs, offset, length);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    34
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    35
        /* sizeof -1  instead of strlen() */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    36
        /* If the word is not 00database* ... */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    37
        if (strncmp(word, "00database", sizeof("00database") - 1) != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    38
            zprocess_def(word, defstr);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    39
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    40
        /* stdout Display */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    41
        dispnwords++;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    42
        nwords++;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    43
        if (dispnwords >= 1000)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    44
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    45
            dispnwords = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    46
            fprintf(stderr,
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    47
                    "Loaded: %i Repeated definitions avoided: %i\n", nwords,
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    48
                    def_avoided);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    49
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    50
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    51
    } while(1);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    52
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    53
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    54
static void close_files()
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    55
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    56
    fclose(index);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    57
    fclose(dict);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    58
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    59
    if (remove_tmp_file)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    60
        unlink("/tmp/tmp.dict");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    61
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    62
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    63
static void open_files(int argn, char **argv)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    64
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    65
    char tmpname[500];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    66
    if (argn < 2)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    67
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    68
        fprintf(stderr, "usage: %s <dict_basename>\n", argv[0]);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    69
        exit(1);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    70
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    71
    strcpy(tmpname, argv[1]);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    72
    strcat(tmpname, ".index");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    73
    index = fopen(tmpname, "r");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    74
    if(index == NULL)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    75
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    76
        fprintf(stderr, "File: %s ", tmpname);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    77
        perror("- cannot open file.");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    78
        exit(-1);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    79
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    80
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    81
    strcpy(tmpname, argv[1]);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    82
    strcat(tmpname, ".dict");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    83
    dict = fopen(tmpname, "r");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    84
    if(dict == NULL)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    85
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    86
        struct stat st;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    87
        int res;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    88
        char tmp[500];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    89
        strcat(tmpname, ".dz");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    90
        res = stat(tmpname, &st);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    91
        if (res == -1)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    92
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    93
            fprintf(stderr, "File: %s ", tmpname);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    94
            perror("- cannot open file.");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    95
            exit(-1);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    96
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    97
        sprintf(tmp, "gzip -cd %s > /tmp/tmp.dict",
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    98
                tmpname);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    99
        printf("Gunzipping...\n");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   100
        res = system(tmp);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   101
        dict = fopen("/tmp/tmp.dict", "r");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   102
        if(dict == NULL || res != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   103
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   104
            fprintf(stderr, "Error gunzipping file: %s ", tmpname);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   105
            perror("- something happened to /tmp/tmp.dict.");
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   106
            exit(-1);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   107
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   108
        remove_tmp_file = 1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   109
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   110
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   111
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   112
int main(int argn, char **argv)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   113
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   114
    open_files(argn, argv);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   115
    init_wordlist();
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   116
    zload_words(index, dict);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   117
    dump_wordlist();
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   118
    close_files();
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   119
}