find.c
changeset 14 a961bb8806b9
child 17 d95d9e7a2b81
equal deleted inserted replaced
13:f71e89074c62 14:a961bb8806b9
       
     1 #include <stdio.h>
       
     2 #include <sys/stat.h>
       
     3 #include <sys/types.h>
       
     4 #include <sys/mman.h>
       
     5 #include <fcntl.h>
       
     6 #include "dictre.h"
       
     7 
       
     8 static unsigned char *index;
       
     9 static int indexfd;
       
    10 static int indexsize;
       
    11 static FILE *defs;
       
    12 const static char indexname[] = "akcentiga.index";
       
    13 const static char dictname[] = "akcentiga.dict";
       
    14 
       
    15 int get_filesize(const char *fname)
       
    16 {
       
    17     struct stat st;
       
    18     int res;
       
    19     res = stat(fname, &st);
       
    20     if (res == -1)
       
    21     {
       
    22         fprintf(stderr, "Problem stating the file %s\n", fname);
       
    23         perror("Error:");
       
    24         exit(-1);
       
    25     }
       
    26 
       
    27     return st.st_size;
       
    28 }
       
    29 
       
    30 void init_dictionary()
       
    31 {
       
    32     indexsize = get_filesize(indexname);
       
    33     indexfd = open(indexname, O_RDONLY);
       
    34     if (indexfd == -1)
       
    35     {
       
    36         fprintf(stderr, "Problem opening the file %s\n", indexname);
       
    37         perror("Error:");
       
    38         exit(-1);
       
    39     }
       
    40     index = (unsigned char *) mmap(0, indexsize, PROT_READ, MAP_SHARED,
       
    41             indexfd, 0);
       
    42 
       
    43     defs = fopen(dictname, "r");
       
    44     if (defs == 0)
       
    45     {
       
    46         fprintf(stderr, "Problem opening the file %s\n", dictname);
       
    47         perror("Error:");
       
    48         exit(-1);
       
    49     }
       
    50 }
       
    51 
       
    52 void end_dictionary()
       
    53 {
       
    54     munmap(index, indexsize);
       
    55     close(indexfd);
       
    56     fclose(defs);
       
    57 }
       
    58 
       
    59 static void fill_def(int offset, int length, char * def)
       
    60 {
       
    61     fseek(defs, offset, SEEK_SET);
       
    62     fread(def, 1, length, defs);
       
    63 }
       
    64 
       
    65 static int pointer_at_end(unsigned char *ptr)
       
    66 {
       
    67     if (ptr >= (index + indexsize))
       
    68         return 1;
       
    69     return 0;
       
    70 }
       
    71 
       
    72 static char * skip_until_newline(char *from)
       
    73 {
       
    74     if (pointer_at_end(from))
       
    75         return 0;
       
    76     while(*from != '\n' && *from != 0)
       
    77     {
       
    78         ++from;
       
    79         if(pointer_at_end(from))
       
    80             return 0;
       
    81     }
       
    82     return from;
       
    83 }
       
    84 
       
    85 static int compare(const unsigned char *word, const unsigned char *test)
       
    86 {
       
    87     int i;
       
    88 
       
    89     /*printf("Comparing %s to %.20s\n", word, test);*/
       
    90     for(i=0; word[i] != 0 && test[i] != 0; ++i)
       
    91     {
       
    92         if (word[i] != test[i])
       
    93         {
       
    94             break;
       
    95         }
       
    96     }
       
    97     if (word[i] == 0 && test[i] == '\t')
       
    98         return 0;
       
    99     else if (word[i] == 0)
       
   100         return -1;
       
   101     else if (test[i] == '\t')
       
   102         return 1;
       
   103     else if (word[i] > test[i])
       
   104         return 1;
       
   105     else if (word[i] < test[i])
       
   106         return -1;
       
   107 
       
   108     /* It should never reach this. */
       
   109     return -1;
       
   110 }
       
   111 
       
   112 static char * bin_search(const char *word)
       
   113 {
       
   114     int step, pivot;
       
   115 
       
   116     pivot = indexsize / 2;
       
   117     step = indexsize / 2;
       
   118 
       
   119     do
       
   120     {
       
   121         char *test;
       
   122         int comparision;
       
   123         test = index + pivot;
       
   124         test = skip_until_newline(test);
       
   125         if (test == 0)
       
   126             return 0;
       
   127         test += 1; /* skip exactly the new line */
       
   128 
       
   129         comparision = compare(word, test);
       
   130         if (comparision == 0)
       
   131         {
       
   132             return test + strlen(word) + 1; /* skip word and \n */
       
   133         } else if (comparision < 0)
       
   134         {
       
   135             step = step / 2;
       
   136             pivot = pivot - step;
       
   137         } else if (comparision > 0)
       
   138         {
       
   139             step = step / 2;
       
   140             pivot = pivot + step;
       
   141         }
       
   142     } while(step > 0);
       
   143     return 0;
       
   144 }
       
   145 
       
   146 static int my_get_int(char **pos)
       
   147 {
       
   148     int i;
       
   149     char *start;
       
   150     int val;
       
   151 
       
   152     start = *pos;
       
   153     for(i=0; start[i] != '\t' && start[i] != '\n'; ++i)
       
   154         ;
       
   155     val = str2int_len(start, i);
       
   156     *pos += i + 1;
       
   157     return val;
       
   158 }
       
   159 
       
   160 void find_def(const char *word, char * def)
       
   161 {
       
   162     int offset, len;
       
   163     char *pos;
       
   164 
       
   165     pos = bin_search(word); /* pos points to the offset already. */
       
   166     if (pos == 0)
       
   167     {
       
   168         def[0] = 0;
       
   169         /*fprintf(stderr, "Cannot find %s\n", word);*/
       
   170         return;
       
   171     }
       
   172     offset = my_get_int(&pos); /* increments pos */
       
   173     len = my_get_int(&pos); /* increments pos */
       
   174     fill_def(offset, len, def);
       
   175 }