author | viric@llimona |
Sat, 08 Sep 2007 09:12:50 +0200 | |
changeset 28 | 75b6d5659a19 |
parent 11 | 68ea18fe402c |
permissions | -rw-r--r-- |
11
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
1 |
#include <stdio.h> |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
2 |
#include <sys/stat.h> |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
3 |
#include "dictre.h" |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
4 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
5 |
static FILE *index, *dict; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
6 |
static remove_tmp_file = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
7 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
8 |
static new_word(const char *w, const char *defstr) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
9 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
10 |
printf("'%s': '%s'\n", w, defstr); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
11 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
12 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
13 |
void zload_words(FILE *index, FILE *fdefs) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
14 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
15 |
int last_offset = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
16 |
int def_avoided = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
17 |
int numword = 0;; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
18 |
static int dispnwords = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
19 |
static int nwords = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
20 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
21 |
do { |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
22 |
int offset, length; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
23 |
char *defstr; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
24 |
char *word; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
25 |
word = get_word(index); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
26 |
/*numword++; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
27 |
printf("words: %i\n", numword);*/ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
28 |
if (word == 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
29 |
break; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
30 |
/*printf("Word: %s\n", w.w);*/ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
31 |
offset = get_int(index); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
32 |
length = get_int(index); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
33 |
defstr = get_def(fdefs, offset, length); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
34 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
35 |
/* sizeof -1 instead of strlen() */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
36 |
/* If the word is not 00database* ... */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
37 |
if (strncmp(word, "00database", sizeof("00database") - 1) != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
38 |
zprocess_def(word, defstr); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
39 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
40 |
/* stdout Display */ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
41 |
dispnwords++; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
42 |
nwords++; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
43 |
if (dispnwords >= 1000) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
44 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
45 |
dispnwords = 0; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
46 |
fprintf(stderr, |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
47 |
"Loaded: %i Repeated definitions avoided: %i\n", nwords, |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
48 |
def_avoided); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
49 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
50 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
51 |
} while(1); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
52 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
53 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
54 |
static void close_files() |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
55 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
56 |
fclose(index); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
57 |
fclose(dict); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
58 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
59 |
if (remove_tmp_file) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
60 |
unlink("/tmp/tmp.dict"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
61 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
62 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
63 |
static void open_files(int argn, char **argv) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
64 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
65 |
char tmpname[500]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
66 |
if (argn < 2) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
67 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
68 |
fprintf(stderr, "usage: %s <dict_basename>\n", argv[0]); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
69 |
exit(1); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
70 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
71 |
strcpy(tmpname, argv[1]); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
72 |
strcat(tmpname, ".index"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
73 |
index = fopen(tmpname, "r"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
74 |
if(index == NULL) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
75 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
76 |
fprintf(stderr, "File: %s ", tmpname); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
77 |
perror("- cannot open file."); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
78 |
exit(-1); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
79 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
80 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
81 |
strcpy(tmpname, argv[1]); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
82 |
strcat(tmpname, ".dict"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
83 |
dict = fopen(tmpname, "r"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
84 |
if(dict == NULL) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
85 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
86 |
struct stat st; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
87 |
int res; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
88 |
char tmp[500]; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
89 |
strcat(tmpname, ".dz"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
90 |
res = stat(tmpname, &st); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
91 |
if (res == -1) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
92 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
93 |
fprintf(stderr, "File: %s ", tmpname); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
94 |
perror("- cannot open file."); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
95 |
exit(-1); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
96 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
97 |
sprintf(tmp, "gzip -cd %s > /tmp/tmp.dict", |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
98 |
tmpname); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
99 |
printf("Gunzipping...\n"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
100 |
res = system(tmp); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
101 |
dict = fopen("/tmp/tmp.dict", "r"); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
102 |
if(dict == NULL || res != 0) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
103 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
104 |
fprintf(stderr, "Error gunzipping file: %s ", tmpname); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
105 |
perror("- something happened to /tmp/tmp.dict."); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
106 |
exit(-1); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
107 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
108 |
remove_tmp_file = 1; |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
109 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
110 |
} |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
111 |
|
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
112 |
int main(int argn, char **argv) |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
113 |
{ |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
114 |
open_files(argn, argv); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
115 |
init_wordlist(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
116 |
zload_words(index, dict); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
117 |
dump_wordlist(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
118 |
close_files(); |
68ea18fe402c
Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff
changeset
|
119 |
} |