find.c
author viric@llimona
Sat, 01 Sep 2007 13:04:10 +0200
changeset 20 45798398f4c8
parent 17 d95d9e7a2b81
child 21 01fe372188ac
permissions -rw-r--r--
Test for http_dec.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     2
#include <sys/stat.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     3
#include <sys/types.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     4
#include <sys/mman.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     5
#include <fcntl.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     6
#include "dictre.h"
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     7
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
     8
const static char indexext[] = ".index";
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
     9
const static char dictext[] = ".dict";
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    10
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    11
int get_filesize(const char *fname)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    12
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    13
    struct stat st;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    14
    int res;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    15
    res = stat(fname, &st);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    16
    if (res == -1)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    17
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    18
        fprintf(stderr, "Problem stating the file %s\n", fname);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    19
        perror("Error:");
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    20
        exit(-1);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    21
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    22
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    23
    return st.st_size;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    24
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    25
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    26
void init_dictionary(struct Dict *d, const char *base)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    27
{
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    28
    char *filename;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    29
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    30
    filename = (char *) malloc(strlen(base) + 10);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    31
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    32
    /* Prepare .index filename and open it*/
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    33
    strcpy(filename, base);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    34
    strcat(filename, indexext);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    35
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    36
    d->indexsize = get_filesize(filename);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    37
    d->indexfd = open(filename, O_RDONLY);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    38
    if (d->indexfd == -1)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    39
    {
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    40
        fprintf(stderr, "Problem opening the file %s\n", filename);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    41
        perror("Error:");
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    42
        exit(-1);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    43
    }
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    44
    d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED,
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    45
            d->indexfd, 0);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    46
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    47
    /* Prepare .dict filename and open it*/
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    48
    strcpy(filename, base);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    49
    strcat(filename, dictext);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    50
    d->defs = fopen(filename, "r");
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    51
    if (d->defs == 0)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    52
    {
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    53
        fprintf(stderr, "Problem opening the file %s\n", filename);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    54
        perror("Error:");
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    55
        exit(-1);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    56
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    57
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    58
    free(filename);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    59
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    60
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    61
void end_dictionary(struct Dict *d)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    62
{
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    63
    munmap(d->index, d->indexsize);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    64
    close(d->indexfd);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    65
    fclose(d->defs);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    66
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    67
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    68
static void fill_def(struct Dict *d, int offset, int length, char * def)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    69
{
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    70
    fseek(d->defs, offset, SEEK_SET);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    71
    fread(def, 1, length, d->defs);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    72
}
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    73
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    74
static int pointer_at_end(struct Dict *d, unsigned char *ptr)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    75
{
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    76
    if (ptr >= (d->index + d->indexsize))
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    77
        return 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    78
    return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    79
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    80
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    81
static char * skip_until_newline(struct Dict *d, char *from)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    82
{
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    83
    if (pointer_at_end(d, from))
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    84
        return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    85
    while(*from != '\n' && *from != 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    86
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    87
        ++from;
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
    88
        if(pointer_at_end(d, from))
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    89
            return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    90
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    91
    return from;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    92
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    93
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    94
static int compare(const unsigned char *word, const unsigned char *test)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    95
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    96
    int i;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    97
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    98
    /*printf("Comparing %s to %.20s\n", word, test);*/
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    99
    for(i=0; word[i] != 0 && test[i] != 0; ++i)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   100
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   101
        if (word[i] != test[i])
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   102
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   103
            break;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   104
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   105
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   106
    if (word[i] == 0 && test[i] == '\t')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   107
        return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   108
    else if (word[i] == 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   109
        return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   110
    else if (test[i] == '\t')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   111
        return 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   112
    else if (word[i] > test[i])
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   113
        return 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   114
    else if (word[i] < test[i])
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   115
        return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   116
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   117
    /* It should never reach this. */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   118
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   119
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   120
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   121
static char * bin_search(struct Dict *d, const char *word)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   122
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   123
    int step, pivot;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   124
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   125
    pivot = d->indexsize / 2;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   126
    step = d->indexsize / 2;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   127
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   128
    do
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   129
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   130
        char *test;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   131
        int comparision;
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   132
        test = d->index + pivot;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   133
        test = skip_until_newline(d, test);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   134
        if (test == 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   135
            return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   136
        test += 1; /* skip exactly the new line */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   137
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   138
        comparision = compare(word, test);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   139
        if (comparision == 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   140
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   141
            return test + strlen(word) + 1; /* skip word and \n */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   142
        } else if (comparision < 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   143
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   144
            step = step / 2;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   145
            pivot = pivot - step;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   146
        } else if (comparision > 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   147
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   148
            step = step / 2;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   149
            pivot = pivot + step;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   150
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   151
    } while(step > 0);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   152
    return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   153
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   154
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   155
static int my_get_int(char **pos)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   156
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   157
    int i;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   158
    char *start;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   159
    int val;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   160
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   161
    start = *pos;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   162
    for(i=0; start[i] != '\t' && start[i] != '\n'; ++i)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   163
        ;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   164
    val = str2int_len(start, i);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   165
    *pos += i + 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   166
    return val;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   167
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   168
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   169
void find_def(struct Dict *d, const char *word, char * def)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   170
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   171
    int offset, len;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   172
    char *pos;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   173
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   174
    pos = bin_search(d, word); /* pos points to the offset already. */
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   175
    if (pos == 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   176
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   177
        def[0] = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   178
        /*fprintf(stderr, "Cannot find %s\n", word);*/
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   179
        return;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   180
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   181
    offset = my_get_int(&pos); /* increments pos */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   182
    len = my_get_int(&pos); /* increments pos */
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 14
diff changeset
   183
    fill_def(d, offset, len, def);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   184
}