viric@14: #include viric@14: #include viric@14: #include viric@14: #include viric@14: #include viric@14: #include "dictre.h" viric@14: viric@17: const static char indexext[] = ".index"; viric@17: const static char dictext[] = ".dict"; viric@14: viric@14: int get_filesize(const char *fname) viric@14: { viric@14: struct stat st; viric@14: int res; viric@14: res = stat(fname, &st); viric@14: if (res == -1) viric@14: { viric@14: fprintf(stderr, "Problem stating the file %s\n", fname); viric@14: perror("Error:"); viric@14: exit(-1); viric@14: } viric@14: viric@14: return st.st_size; viric@14: } viric@14: viric@17: void init_dictionary(struct Dict *d, const char *base) viric@14: { viric@17: char *filename; viric@17: viric@17: filename = (char *) malloc(strlen(base) + 10); viric@17: viric@17: /* Prepare .index filename and open it*/ viric@17: strcpy(filename, base); viric@17: strcat(filename, indexext); viric@17: viric@17: d->indexsize = get_filesize(filename); viric@17: d->indexfd = open(filename, O_RDONLY); viric@17: if (d->indexfd == -1) viric@14: { viric@17: fprintf(stderr, "Problem opening the file %s\n", filename); viric@14: perror("Error:"); viric@14: exit(-1); viric@14: } viric@17: d->index = (unsigned char *) mmap(0, d->indexsize, PROT_READ, MAP_SHARED, viric@17: d->indexfd, 0); viric@14: viric@17: /* Prepare .dict filename and open it*/ viric@17: strcpy(filename, base); viric@17: strcat(filename, dictext); viric@17: d->defs = fopen(filename, "r"); viric@17: if (d->defs == 0) viric@14: { viric@17: fprintf(stderr, "Problem opening the file %s\n", filename); viric@14: perror("Error:"); viric@14: exit(-1); viric@14: } viric@14: viric@21: d->trim_first_line = 0; viric@21: d->trim_last_newlines = 0; viric@21: viric@17: free(filename); viric@14: } viric@14: viric@17: void end_dictionary(struct Dict *d) viric@14: { viric@17: munmap(d->index, d->indexsize); viric@17: close(d->indexfd); viric@17: fclose(d->defs); viric@14: } viric@14: viric@21: static int trim_first_line(char *def, int len) viric@21: { viric@21: int new_line_pos; viric@21: int i,j; viric@21: for(i=0; i < len; ++i) viric@21: { viric@24: /* prepare ltrimming when finding the first newline character */ viric@21: if (def[i] == '\n') viric@21: { viric@24: /* Remove spaces after the first newline */ viric@24: for(i+=1; def[i] == ' '; ++i); viric@24: viric@21: /* Break */ viric@24: memmove(def, def + i, viric@21: len - i - 1); viric@21: def[len-i-1] = 0; viric@24: return len-i-1+1/*\0*/; viric@21: } viric@21: } viric@21: return len; viric@21: } viric@21: viric@21: static int trim_last_newlines(char *def, int len) viric@21: { viric@21: int new_line_pos; viric@21: int i,j; viric@22: if (len < 2) viric@22: return len; viric@22: viric@22: for(i=len-2; i >= 0; --i) viric@21: { viric@21: if (def[i] != '\n' && def[i] != '\r') viric@21: { viric@21: def[i+1] = '\0'; viric@21: return i + 1; viric@21: } viric@21: } viric@21: return len; viric@21: } viric@21: viric@17: static void fill_def(struct Dict *d, int offset, int length, char * def) viric@14: { viric@17: fseek(d->defs, offset, SEEK_SET); viric@17: fread(def, 1, length, d->defs); viric@21: def[length] = 0; viric@21: if (d->trim_first_line) viric@21: { viric@21: length = trim_first_line(def, length + 1/*\0*/) - 1 /*\0*/; viric@21: } viric@21: if (d->trim_last_newlines) viric@21: { viric@21: length = trim_last_newlines(def, length+1) - 1; /* math as above*/ viric@21: } viric@17: } viric@17: viric@21: static int pointer_at_end(struct Dict *d, const unsigned char *ptr) viric@17: { viric@17: if (ptr >= (d->index + d->indexsize)) viric@14: return 1; viric@14: return 0; viric@14: } viric@14: viric@21: static const char * skip_until_newline(struct Dict *d, const char *from) viric@14: { viric@17: if (pointer_at_end(d, from)) viric@14: return 0; viric@14: while(*from != '\n' && *from != 0) viric@14: { viric@14: ++from; viric@17: if(pointer_at_end(d, from)) viric@14: return 0; viric@14: } viric@14: return from; viric@14: } viric@14: viric@14: static int compare(const unsigned char *word, const unsigned char *test) viric@14: { viric@14: int i; viric@14: viric@14: /*printf("Comparing %s to %.20s\n", word, test);*/ viric@14: for(i=0; word[i] != 0 && test[i] != 0; ++i) viric@14: { viric@14: if (word[i] != test[i]) viric@14: { viric@14: break; viric@14: } viric@14: } viric@14: if (word[i] == 0 && test[i] == '\t') viric@14: return 0; viric@14: else if (word[i] == 0) viric@14: return -1; viric@14: else if (test[i] == '\t') viric@14: return 1; viric@14: else if (word[i] > test[i]) viric@14: return 1; viric@14: else if (word[i] < test[i]) viric@14: return -1; viric@14: viric@14: /* It should never reach this. */ viric@14: return -1; viric@14: } viric@14: viric@21: static const char * search_next(struct Dict *d, const char *word, const char *from) viric@21: { viric@21: const char *ret; viric@24: ret = skip_until_newline(d, from) + 1; viric@24: if (ret == (char *) 1) /* pointer at end */ viric@24: return 0; viric@24: if (compare(word, ret) == 0) viric@21: return ret; viric@21: return 0; viric@21: } viric@21: viric@21: static const char * bin_search(struct Dict *d, const char *word) viric@14: { viric@14: int step, pivot; viric@21: const char *ret; viric@21: const char *test; viric@21: int comparision; viric@21: int found_once = 0; viric@14: viric@17: pivot = d->indexsize / 2; viric@17: step = d->indexsize / 2; viric@14: viric@14: do viric@14: { viric@17: test = d->index + pivot; viric@17: test = skip_until_newline(d, test); viric@14: if (test == 0) viric@14: return 0; viric@14: test += 1; /* skip exactly the new line */ viric@14: viric@14: comparision = compare(word, test); viric@21: if (comparision <= 0) viric@14: { viric@21: if (comparision == 0) viric@21: found_once = 1; viric@21: /* If == 0, we don't know that it's the FIRST viric@21: * match possible in the dictionary. viric@21: * We want all possible matches. */ viric@14: step = step / 2; viric@14: pivot = pivot - step; viric@14: } else if (comparision > 0) viric@14: { viric@14: step = step / 2; viric@14: pivot = pivot + step; viric@14: } viric@14: } while(step > 0); viric@21: viric@21: if (!found_once) viric@21: return 0; viric@21: viric@21: if (comparision == 0) /* last comparision */ viric@21: { viric@21: ret = skip_until_newline(d, d->index + pivot) + 1; viric@21: } else viric@21: { viric@21: ret = skip_until_newline(d, test) + 1; viric@21: } viric@21: return ret; viric@14: } viric@14: viric@21: static int my_get_int(const char **pos) viric@14: { viric@14: int i; viric@21: const char *start; viric@14: int val; viric@14: viric@14: start = *pos; viric@14: for(i=0; start[i] != '\t' && start[i] != '\n'; ++i) viric@14: ; viric@14: val = str2int_len(start, i); viric@14: *pos += i + 1; viric@14: return val; viric@14: } viric@14: viric@17: void find_def(struct Dict *d, const char *word, char * def) viric@14: { viric@14: int offset, len; viric@21: const char *found, *pos; viric@21: int wordlen; viric@14: viric@21: def[0] = 0; viric@21: /* we will get a pointer to the offset for the ints*/ viric@21: found = bin_search(d, word); viric@21: if (found == 0) viric@21: return; viric@21: wordlen = strlen(word); viric@21: do viric@14: { viric@21: found += wordlen+1; viric@21: pos = found; viric@21: offset = my_get_int(&pos); /* increments pos */ viric@21: len = my_get_int(&pos); /* increments pos */ viric@21: fill_def(d, offset, len, def); viric@21: found = search_next(d, word, found); viric@21: if (!found) viric@21: break; viric@21: strcat(def, ", "); viric@21: def += strlen(def); viric@21: } while(1); viric@14: }