LEGUMIN kaj aliaj gxisdatigoj.
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "dictre.h"
const static char indexext[] = ".index";
const static char dictext[] = ".dict";
int get_filesize(const char *fname)
{
struct stat st;
int res;
res = stat(fname, &st);
if (res == -1)
{
fprintf(stderr, "Problem stating the file %s\n", fname);
perror("Error:");
exit(-1);
}
return st.st_size;
}
void init_dictionary(struct Dict *d, const char *base)
{
char *filename;
filename = (char *) malloc(strlen(base) + 10);
/* Prepare .index filename and open it*/
strcpy(filename, base);
strcat(filename, indexext);
d->indexsize = get_filesize(filename);
d->indexfd = open(filename, O_RDONLY);
if (d->indexfd == -1)
{
fprintf(stderr, "Problem opening the file %s\n", filename);
perror("Error:");
exit(-1);
}
d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED,
d->indexfd, 0);
/* Prepare .dict filename and open it*/
strcpy(filename, base);
strcat(filename, dictext);
d->defs = fopen(filename, "r");
if (d->defs == 0)
{
fprintf(stderr, "Problem opening the file %s\n", filename);
perror("Error:");
exit(-1);
}
d->trim_first_line = 0;
d->trim_last_newlines = 0;
free(filename);
}
void end_dictionary(struct Dict *d)
{
munmap(d->index, d->indexsize);
close(d->indexfd);
fclose(d->defs);
}
static int trim_first_line(char *def, int len)
{
int new_line_pos;
int i,j;
for(i=0; i < len; ++i)
{
/* prepare ltrimming when finding the first newline character */
if (def[i] == '\n')
{
/* Remove spaces after the first newline */
for(i+=1; def[i] == ' '; ++i);
/* Break */
memmove(def, def + i,
len - i - 1);
def[len-i-1] = 0;
return len-i-1+1/*\0*/;
}
}
return len;
}
static int trim_last_newlines(char *def, int len)
{
int new_line_pos;
int i,j;
if (len < 2)
return len;
for(i=len-2; i >= 0; --i)
{
if (def[i] != '\n' && def[i] != '\r')
{
def[i+1] = '\0';
return i + 1;
}
}
return len;
}
static void fill_def(struct Dict *d, int offset, int length, char * def)
{
fseek(d->defs, offset, SEEK_SET);
fread(def, 1, length, d->defs);
def[length] = 0;
if (d->trim_first_line)
{
length = trim_first_line(def, length + 1/*\0*/) - 1 /*\0*/;
}
if (d->trim_last_newlines)
{
length = trim_last_newlines(def, length+1) - 1; /* math as above*/
}
}
static int pointer_at_end(struct Dict *d, const unsigned char *ptr)
{
if (ptr >= (d->index + d->indexsize))
return 1;
return 0;
}
static const char * skip_until_newline(struct Dict *d, const char *from)
{
if (pointer_at_end(d, from))
return 0;
while(*from != '\n' && *from != 0)
{
++from;
if(pointer_at_end(d, from))
return 0;
}
return from;
}
static int compare(const unsigned char *word, const unsigned char *test)
{
int i;
/*printf("Comparing %s to %.20s\n", word, test);*/
for(i=0; word[i] != 0 && test[i] != 0; ++i)
{
if (word[i] != test[i])
{
break;
}
}
if (word[i] == 0 && test[i] == '\t')
return 0;
else if (word[i] == 0)
return -1;
else if (test[i] == '\t')
return 1;
else if (word[i] > test[i])
return 1;
else if (word[i] < test[i])
return -1;
/* It should never reach this. */
return -1;
}
static const char * search_next(struct Dict *d, const char *word, const char *from)
{
const char *ret;
ret = skip_until_newline(d, from) + 1;
if (ret == (char *) 1) /* pointer at end */
return 0;
if (compare(word, ret) == 0)
return ret;
return 0;
}
static const char * bin_search(struct Dict *d, const char *word)
{
int step, pivot;
const char *ret;
const char *test;
int comparision;
int found_once = 0;
pivot = d->indexsize / 2;
step = d->indexsize / 2;
do
{
test = d->index + pivot;
test = skip_until_newline(d, test);
if (test == 0)
return 0;
test += 1; /* skip exactly the new line */
comparision = compare(word, test);
if (comparision <= 0)
{
if (comparision == 0)
found_once = 1;
/* If == 0, we don't know that it's the FIRST
* match possible in the dictionary.
* We want all possible matches. */
step = step / 2;
pivot = pivot - step;
} else if (comparision > 0)
{
step = step / 2;
pivot = pivot + step;
}
} while(step > 0);
if (!found_once)
return 0;
if (comparision == 0) /* last comparision */
{
ret = skip_until_newline(d, d->index + pivot) + 1;
} else
{
ret = skip_until_newline(d, test) + 1;
}
return ret;
}
static int my_get_int(const char **pos)
{
int i;
const char *start;
int val;
start = *pos;
for(i=0; start[i] != '\t' && start[i] != '\n'; ++i)
;
val = str2int_len(start, i);
*pos += i + 1;
return val;
}
void find_def(struct Dict *d, const char *word, char * def)
{
int offset, len;
const char *found, *pos;
int wordlen;
def[0] = 0;
/* we will get a pointer to the offset for the ints*/
found = bin_search(d, word);
if (found == 0)
return;
wordlen = strlen(word);
do
{
found += wordlen+1;
pos = found;
offset = my_get_int(&pos); /* increments pos */
len = my_get_int(&pos); /* increments pos */
fill_def(d, offset, len, def);
found = search_next(d, word, found);
if (!found)
break;
strcat(def, ", ");
def += strlen(def);
} while(1);
}