Akcentigita teksto

viric@14: #include viric@17: #include viric@14: #include "dictre.h" viric@14: viric@17: static int is_http = 0; viric@24: static int give_html = 1; viric@17: static int content_length = -1; viric@17: static struct Dict dakcentiga; viric@24: static struct Dict dsignifoj; viric@24: viric@24: static void print_html_header() viric@24: { viric@24: printf( viric@24: "\n" viric@24: "\n" viric@24: " \n" viric@24: " Akcentigita teksto\n" viric@24: "\n" viric@24: "\n" viric@24: "\n" viric@24: "" viric@24: ); viric@24: viric@24: } viric@24: viric@24: static void print_html_footer() viric@24: { viric@24: printf("\n"); viric@24: } viric@24: viric@25: static void dump_signifo_with_p(const unsigned char *word, viric@25: const unsigned char *signifo) viric@24: { viric@24: int i; viric@24: char last; viric@24: viric@24: printf("

## Se moto %s:

\n

", word); viric@24: viric@24: last = 0; viric@24: i=0; viric@24: do viric@24: { viric@24: char nun; viric@24: nun = *signifo; viric@24: if (nun == '\0') viric@24: break; viric@24: if (nun == '\n' && last == '\n') viric@24: printf("

"); viric@24: putchar(nun); viric@24: last = nun; viric@24: ++signifo; viric@24: } while(1); viric@24: printf("

"); viric@24: } viric@24: viric@24: static void print_start_signifoj(int id) viric@24: { viric@28: printf("

\n", id, id); viric@24: } viric@24: viric@24: static void print_end_signifoj() viric@24: { viric@24: printf("

"); viric@24: } viric@24: viric@24: /* This expects the word to start exactly at *wordlist */ viric@24: static void print_signifoj(char *wordlist, int id) viric@24: { viric@24: char * next_space; viric@24: char def[MAXDEF]; viric@24: int start_shown = 0; viric@24: viric@24: do { viric@24: next_space = strchr(wordlist, ' '); viric@24: if (next_space == 0) viric@24: next_space = strchr(wordlist, '\n'); viric@24: if (next_space == 0) viric@24: next_space = strchr(wordlist, '\0'); viric@24: if (next_space != 0 && next_space != wordlist) viric@24: { viric@24: int spacepos; viric@24: spacepos = next_space - wordlist; viric@24: wordlist[spacepos] = 0; viric@24: viric@24: /* wordlist has the word to search */ viric@24: find_def(&dsignifoj, wordlist, def); viric@24: if (def[0]) viric@24: { viric@24: if (!start_shown) viric@24: { viric@24: print_start_signifoj(id); viric@24: start_shown = 1; viric@24: } viric@24: dump_signifo_with_p(wordlist, def); viric@24: } viric@24: } else viric@24: break; viric@24: wordlist = next_space + 1; viric@24: } while(1); viric@24: viric@24: if (start_shown) viric@24: print_end_signifoj(); viric@24: } viric@24: viric@24: static print_accented(const char *word, int id) viric@24: { viric@24: if (give_html) viric@24: { viric@28: printf("" viric@24: "%s",id,word); viric@24: } else viric@24: printf("%s", word); viric@24: } viric@17: viric@15: static void give_accent_to_word(const char *word) viric@14: { viric@14: char def[MAXDEF]; viric@15: char low[MAXWORD]; viric@15: char recased[MAXWORD]; viric@15: enum Case vcase[MAXWORD]; viric@24: static int id = 1; viric@14: viric@15: /* Get case */ viric@15: get_case(vcase, word); viric@15: viric@15: /* Get lowercase version */ viric@15: get_lowcase_str(low, word); viric@15: viric@15: /* Find the lowercase version */ viric@17: find_def(&dakcentiga, low, def); viric@14: if (def[0] != 0) /* found */ viric@14: { viric@14: /* Print the word UNTIL a space. viric@14: * the definition will have the form: viric@14: * ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */ viric@14: char *first_space; viric@15: char spacepos; viric@14: first_space = strchr(def, ' '); viric@14: if (first_space != 0) /* Space found */ viric@15: { viric@15: spacepos = first_space - def; viric@15: def[spacepos] = 0; /* Mark an end of string */ viric@15: reapply_case(recased, def, vcase); viric@24: print_accented(recased, id); viric@24: if (give_html) viric@24: print_signifoj(first_space + 1 /*' '*/, id); viric@24: ++id; viric@15: } viric@14: return; viric@14: } viric@24: else if (give_html) viric@24: { viric@24: char def[MAXDEF]; viric@24: /* OPTIMIZE: This, if find, will react in a second search at viric@24: * print_signifoj() */ viric@24: find_def(&dsignifoj, low, def); viric@24: if (def[0]) viric@24: { viric@24: print_accented(word, id); viric@24: print_signifoj(low, id); viric@24: ++id; viric@24: return; viric@24: } viric@24: } viric@14: viric@24: /* else ... */ viric@24: viric@24: /* if first_space == 0 or word not found _AND_ word not found in meanings viric@24: * when give_html...*/ viric@15: printf("%s", word); viric@14: } viric@14: viric@17: static int my_fgetc(FILE *f) viric@17: { viric@17: if (is_http) viric@17: return http_getc(f); viric@17: else viric@17: return fgetc(f); viric@17: } viric@17: viric@14: static void process_text(FILE *in, int pos, int length) viric@14: { viric@14: unsigned char tmp[MAXWORD]; viric@14: int wordpos = 0; viric@18: int skip_non_ascii = 0; viric@24: int last = 0; viric@18: viric@24: if (give_html) viric@24: printf("

"); viric@14: do viric@14: { viric@14: int c; viric@14: /* Check pos only if length >= 0 */ viric@14: if (length >= 0 && pos >= length) viric@14: break; viric@17: c = my_fgetc(in); viric@17: if (c == EOF || c == END_OF_URL) viric@14: break; viric@24: if (c == '\r') viric@24: continue; viric@24: viric@24: /* Process 'last' for eventual line break */ viric@24: if (give_html && c == '\n' && last == '\n') viric@24: printf("

\n

"); viric@24: last = c; viric@24: viric@18: if (skip_non_ascii || is_ASCII(c)) viric@14: { viric@14: if (wordpos != 0) viric@14: { viric@14: tmp[wordpos] = 0; viric@14: give_accent_to_word(tmp); viric@14: wordpos = 0; viric@14: } viric@14: putchar(c); viric@18: /* End of skip_non_ascii when we find an viric@18: * ascii string */ viric@18: if (skip_non_ascii && is_ASCII(c)) viric@18: skip_non_ascii = 0; viric@14: } viric@14: else /* non-ASCII - we consider it russian */ viric@14: { viric@14: tmp[wordpos++] = c; viric@18: if (wordpos >= MAXWORD) viric@18: { viric@18: /* Dump the word and the rest of non-ASCII, because viric@18: * we cannot fit it in 'tmp' */ viric@18: int i; viric@18: for(i=0; i < wordpos; ++i) viric@18: putchar(tmp[i]); viric@18: wordpos=0; viric@18: skip_non_ascii = 1; viric@18: } viric@14: } viric@14: viric@14: pos += 1; viric@14: } while(1); viric@17: viric@17: /* End word */ viric@17: if (wordpos != 0) viric@17: { viric@17: tmp[wordpos] = 0; viric@17: give_accent_to_word(tmp); viric@17: wordpos = 0; viric@17: } viric@24: if (give_html) viric@24: printf("

"); viric@17: } viric@17: viric@17: static print_http_header() viric@17: { viric@17: printf("Content-Type:text/html;charset=utf-8\r\n\r\n"); viric@17: } viric@17: viric@17: int eat_form_ok() viric@17: { viric@17: const char mask[] = "teksto="; viric@17: char tmp[sizeof(mask)]; viric@17: fread(tmp, 1, sizeof(mask)-1, stdin); viric@17: tmp[sizeof(mask)-1] = 0; viric@17: if (strcmp(mask, tmp) == 0) viric@17: return 1; viric@17: return 0; viric@14: } viric@14: viric@14: int main() viric@14: { viric@17: char *c; viric@17: viric@17: init_dictionary(&dakcentiga, "akcentiga"); viric@24: init_dictionary(&dsignifoj, "signifoj"); viric@17: viric@17: if (c = getenv("CONTENT_LENGTH")) viric@17: { viric@17: content_length = atoi(c); viric@17: is_http = 1; viric@17: } viric@17: if (is_http) viric@17: { viric@17: print_http_header(); viric@17: if (!eat_form_ok()) viric@17: return -1; viric@17: } viric@24: viric@24: if (give_html) viric@24: print_html_header(); viric@24: /* We pass -1 so we don't check content length */ viric@14: process_text(stdin, 0, -1); viric@24: viric@24: if (give_html) viric@24: print_html_footer(); viric@17: end_dictionary(&dakcentiga); viric@17: viric@17: return 0; viric@14: }