Improved hash on zprocess, and added parsing for "jo".
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "dictre.h"
static unsigned char *index;
static int indexfd;
static int indexsize;
static FILE *defs;
const static char indexname[] = "akcentiga.index";
const static char dictname[] = "akcentiga.dict";
int get_filesize(const char *fname)
{
struct stat st;
int res;
res = stat(fname, &st);
if (res == -1)
{
fprintf(stderr, "Problem stating the file %s\n", fname);
perror("Error:");
exit(-1);
}
return st.st_size;
}
void init_dictionary()
{
indexsize = get_filesize(indexname);
indexfd = open(indexname, O_RDONLY);
if (indexfd == -1)
{
fprintf(stderr, "Problem opening the file %s\n", indexname);
perror("Error:");
exit(-1);
}
index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED,
indexfd, 0);
defs = fopen(dictname, "r");
if (defs == 0)
{
fprintf(stderr, "Problem opening the file %s\n", dictname);
perror("Error:");
exit(-1);
}
}
void end_dictionary()
{
munmap(index, indexsize);
close(indexfd);
fclose(defs);
}
static void fill_def(int offset, int length, char * def)
{
fseek(defs, offset, SEEK_SET);
fread(def, 1, length, defs);
}
static int pointer_at_end(unsigned char *ptr)
{
if (ptr >= (index + indexsize))
return 1;
return 0;
}
static char * skip_until_newline(char *from)
{
if (pointer_at_end(from))
return 0;
while(*from != '\n' && *from != 0)
{
++from;
if(pointer_at_end(from))
return 0;
}
return from;
}
static int compare(const unsigned char *word, const unsigned char *test)
{
int i;
/*printf("Comparing %s to %.20s\n", word, test);*/
for(i=0; word[i] != 0 && test[i] != 0; ++i)
{
if (word[i] != test[i])
{
break;
}
}
if (word[i] == 0 && test[i] == '\t')
return 0;
else if (word[i] == 0)
return -1;
else if (test[i] == '\t')
return 1;
else if (word[i] > test[i])
return 1;
else if (word[i] < test[i])
return -1;
/* It should never reach this. */
return -1;
}
static char * bin_search(const char *word)
{
int step, pivot;
pivot = indexsize / 2;
step = indexsize / 2;
do
{
char *test;
int comparision;
test = index + pivot;
test = skip_until_newline(test);
if (test == 0)
return 0;
test += 1; /* skip exactly the new line */
comparision = compare(word, test);
if (comparision == 0)
{
return test + strlen(word) + 1; /* skip word and \n */
} else if (comparision < 0)
{
step = step / 2;
pivot = pivot - step;
} else if (comparision > 0)
{
step = step / 2;
pivot = pivot + step;
}
} while(step > 0);
return 0;
}
static int my_get_int(char **pos)
{
int i;
char *start;
int val;
start = *pos;
for(i=0; start[i] != '\t' && start[i] != '\n'; ++i)
;
val = str2int_len(start, i);
*pos += i + 1;
return val;
}
void find_def(const char *word, char * def)
{
int offset, len;
char *pos;
pos = bin_search(word); /* pos points to the offset already. */
if (pos == 0)
{
def[0] = 0;
/*fprintf(stderr, "Cannot find %s\n", word);*/
return;
}
offset = my_get_int(&pos); /* increments pos */
len = my_get_int(&pos); /* increments pos */
fill_def(offset, len, def);
}