parse_text.c
author viric@llimona
Sun, 02 Sep 2007 14:24:10 +0200
changeset 24 026a2ba0ce16
parent 18 64ed4238657f
child 25 8d524bb8dcea
permissions -rw-r--r--
Now the HTML result shows word meanings.
viric@14
     1
#include <stdio.h>
viric@17
     2
#include <stdlib.h>
viric@14
     3
#include "dictre.h"
viric@14
     4
viric@17
     5
static int is_http = 0;
viric@24
     6
static int give_html = 1;
viric@17
     7
static int content_length = -1;
viric@17
     8
static struct Dict dakcentiga;
viric@24
     9
static struct Dict dsignifoj;
viric@24
    10
viric@24
    11
static void print_html_header()
viric@24
    12
{
viric@24
    13
    printf(
viric@24
    14
"<html>\n"
viric@24
    15
"<head>\n"
viric@24
    16
"    <meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n"
viric@24
    17
"    <title>Akcentigita teksto</title>\n"
viric@24
    18
"</head>\n"
viric@24
    19
"<style type=\"text/css\" media=\"screen\">\n"
viric@24
    20
"div.difino { display: none ;\n"
viric@24
    21
"    position: absolute;\n"
viric@24
    22
"    left: 0;\n"
viric@24
    23
"    background: moccasin;\n"
viric@24
    24
"    border: thin;\n"
viric@24
    25
"    border-color: black;\n"
viric@24
    26
"    padding: 10px 10px 10px 10px;\n"
viric@24
    27
"    text-indent: 0em;\n"
viric@24
    28
"    text-align: left;\n"
viric@24
    29
"    }\n"
viric@24
    30
"div.alineo { text-indent: 2em ; text-align: justify }\n"
viric@24
    31
"</style>\n"
viric@24
    32
"<body>\n"
viric@24
    33
"<script language=\"javascript\">\n"
viric@24
    34
"var blocked = \"\";\n"
viric@24
    35
"\n"
viric@24
    36
"function display(element, ev)\n"
viric@24
    37
"{\n"
viric@24
    38
"    var x,y;\n"
viric@24
    39
"    x = ev.screenX + window.pageXOffset;\n"
viric@24
    40
"    y = ev.screenY + window.pageYOffset;\n"
viric@24
    41
"\n"
viric@24
    42
"    if (blocked != \"\")\n"
viric@24
    43
"        return;\n"
viric@24
    44
#if 0
viric@24
    45
"\n"
viric@24
    46
"    if (x + 320 > window.innerWidth)\n"
viric@24
    47
"        x = window.innerWidth - 320;\n"
viric@24
    48
"\n"
viric@24
    49
"    /*\n"
viric@24
    50
"    if (y < window.innerHeight / 2)\n"
viric@24
    51
"    {\n"
viric@24
    52
"        y += 5;\n"
viric@24
    53
"        document.getElementById(element).style.top = y;\n"
viric@24
    54
"    } else {\n"
viric@24
    55
"        y -= 5;\n"
viric@24
    56
"        document.getElementById(element).style.top = 0;\n"
viric@24
    57
"        document.getElementById(element).style.bottom = y;\n"
viric@24
    58
"    }\n"
viric@24
    59
"    */\n"
viric@24
    60
#endif
viric@24
    61
"    y += 5;\n"
viric@24
    62
"    document.getElementById(element).style.top = y + 'px';\n"
viric@24
    63
"    \n"
viric@24
    64
/*
viric@24
    65
"    document.getElementById(element).style.left = 0;\n"
viric@24
    66
*/
viric@24
    67
"    document.getElementById(element).style.display = 'block';\n"
viric@24
    68
"}\n"
viric@24
    69
"\n"
viric@24
    70
"function undisplay(element)\n"
viric@24
    71
"{\n"
viric@24
    72
"    document.getElementById(element).style.display = 'none';\n"
viric@24
    73
"}\n"
viric@24
    74
"\n"
viric@24
    75
"function click(element, ev)\n"
viric@24
    76
"{\n"
viric@24
    77
"    if (blocked == \"\")\n"
viric@24
    78
"    {\n"
viric@24
    79
"        display(element, ev);\n"
viric@24
    80
"        blocked = element;\n"
viric@24
    81
"    }\n"
viric@24
    82
"    else\n"
viric@24
    83
"    {\n"
viric@24
    84
"        undisplay(blocked);\n"
viric@24
    85
"        blocked = \"\";\n"
viric@24
    86
"    }\n"
viric@24
    87
"}\n"
viric@24
    88
"\n"
viric@24
    89
"</script>"
viric@24
    90
);
viric@24
    91
viric@24
    92
}
viric@24
    93
viric@24
    94
static void print_html_footer()
viric@24
    95
{
viric@24
    96
    printf("</body></html>\n");
viric@24
    97
}
viric@24
    98
viric@24
    99
static void dump_signifo_with_p(const char *word, const char *signifo)
viric@24
   100
{
viric@24
   101
    int i;
viric@24
   102
    char last;
viric@24
   103
viric@24
   104
    printf("<p>## Se moto %s:</p>\n<p>", word);
viric@24
   105
viric@24
   106
    last = 0;
viric@24
   107
    i=0;
viric@24
   108
    do
viric@24
   109
    {
viric@24
   110
        char nun;
viric@24
   111
        nun = *signifo;
viric@24
   112
        if (nun == '\0')
viric@24
   113
            break;
viric@24
   114
        if (nun == '\n' && last == '\n')
viric@24
   115
            printf("</p><p>");
viric@24
   116
        putchar(nun);
viric@24
   117
        last = nun;
viric@24
   118
        ++signifo;
viric@24
   119
    } while(1);
viric@24
   120
    printf("</p>");
viric@24
   121
}
viric@24
   122
viric@24
   123
static void print_start_signifoj(int id)
viric@24
   124
{
viric@24
   125
    printf("<div id=\"vorto%i\" onclick=\"click('vorto%i', event);\" "
viric@24
   126
            "class=\"difino\"><p>\n", id, id);
viric@24
   127
}
viric@24
   128
viric@24
   129
static void print_end_signifoj()
viric@24
   130
{
viric@24
   131
    printf("</p></div>");
viric@24
   132
}
viric@24
   133
viric@24
   134
/* This expects the word to start exactly at *wordlist */
viric@24
   135
static void print_signifoj(char *wordlist, int id)
viric@24
   136
{
viric@24
   137
    char * next_space;
viric@24
   138
    char def[MAXDEF];
viric@24
   139
    int start_shown = 0;
viric@24
   140
viric@24
   141
    do {
viric@24
   142
        next_space = strchr(wordlist, ' ');
viric@24
   143
        if (next_space == 0)
viric@24
   144
            next_space = strchr(wordlist, '\n');
viric@24
   145
        if (next_space == 0)
viric@24
   146
            next_space = strchr(wordlist, '\0');
viric@24
   147
        if (next_space != 0 && next_space != wordlist)
viric@24
   148
        {
viric@24
   149
            int spacepos;
viric@24
   150
            spacepos = next_space - wordlist;
viric@24
   151
            wordlist[spacepos] = 0;
viric@24
   152
viric@24
   153
            /* wordlist has the word to search */
viric@24
   154
            find_def(&dsignifoj, wordlist, def);
viric@24
   155
            if (def[0])
viric@24
   156
            {
viric@24
   157
                if (!start_shown)
viric@24
   158
                {
viric@24
   159
                    print_start_signifoj(id);
viric@24
   160
                    start_shown = 1;
viric@24
   161
                }
viric@24
   162
                dump_signifo_with_p(wordlist, def);
viric@24
   163
            }
viric@24
   164
        } else
viric@24
   165
            break;
viric@24
   166
        wordlist = next_space + 1;
viric@24
   167
    } while(1);
viric@24
   168
viric@24
   169
    if (start_shown)
viric@24
   170
        print_end_signifoj();
viric@24
   171
}
viric@24
   172
viric@24
   173
static print_accented(const char *word, int id)
viric@24
   174
{
viric@24
   175
    if (give_html)
viric@24
   176
    {
viric@24
   177
        printf("<span class=\"vorto\" onclick=\"click('vorto%i', event);\">"
viric@24
   178
                "%s</span>",id,word);
viric@24
   179
    } else
viric@24
   180
        printf("%s", word);
viric@24
   181
}
viric@17
   182
viric@15
   183
static void give_accent_to_word(const char *word)
viric@14
   184
{
viric@14
   185
    char def[MAXDEF];
viric@15
   186
    char low[MAXWORD];
viric@15
   187
    char recased[MAXWORD];
viric@15
   188
    enum Case vcase[MAXWORD];
viric@24
   189
    static int id = 1;
viric@14
   190
viric@15
   191
    /* Get case */
viric@15
   192
    get_case(vcase, word);
viric@15
   193
viric@15
   194
    /* Get lowercase version */
viric@15
   195
    get_lowcase_str(low, word);
viric@15
   196
viric@15
   197
    /* Find the lowercase version */
viric@17
   198
    find_def(&dakcentiga, low, def);
viric@14
   199
    if (def[0] != 0) /* found */
viric@14
   200
    {
viric@14
   201
        /* Print the word UNTIL a space.
viric@14
   202
         * the definition will have the form:
viric@14
   203
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
viric@14
   204
        char *first_space;
viric@15
   205
        char spacepos;
viric@14
   206
        first_space = strchr(def, ' ');
viric@14
   207
        if (first_space != 0) /* Space found */
viric@15
   208
        {
viric@15
   209
            spacepos = first_space - def;
viric@15
   210
            def[spacepos] = 0; /* Mark an end of string */
viric@15
   211
            reapply_case(recased, def, vcase);
viric@24
   212
            print_accented(recased, id);
viric@24
   213
            if (give_html)
viric@24
   214
                print_signifoj(first_space + 1 /*' '*/, id);
viric@24
   215
            ++id;
viric@15
   216
        }
viric@14
   217
        return;
viric@14
   218
    }
viric@24
   219
    else if (give_html)
viric@24
   220
    {
viric@24
   221
        char def[MAXDEF];
viric@24
   222
        /* OPTIMIZE: This, if find, will react in a second search at
viric@24
   223
         * print_signifoj() */
viric@24
   224
        find_def(&dsignifoj, low, def);
viric@24
   225
        if (def[0])
viric@24
   226
        {
viric@24
   227
            print_accented(word, id);
viric@24
   228
            print_signifoj(low, id);
viric@24
   229
            ++id;
viric@24
   230
            return;
viric@24
   231
        }
viric@24
   232
    }
viric@14
   233
viric@24
   234
    /* else ... */
viric@24
   235
viric@24
   236
    /* if first_space == 0 or word not found _AND_ word not found in meanings
viric@24
   237
     * when give_html...*/
viric@15
   238
    printf("%s", word);
viric@14
   239
}
viric@14
   240
viric@17
   241
static int my_fgetc(FILE *f)
viric@17
   242
{
viric@17
   243
    if (is_http)
viric@17
   244
        return http_getc(f);
viric@17
   245
    else
viric@17
   246
        return fgetc(f);
viric@17
   247
}
viric@17
   248
viric@14
   249
static void process_text(FILE *in, int pos, int length)
viric@14
   250
{
viric@14
   251
    unsigned char tmp[MAXWORD];
viric@14
   252
    int wordpos = 0;
viric@18
   253
    int skip_non_ascii = 0;
viric@24
   254
    int last = 0;
viric@18
   255
viric@24
   256
    if (give_html)
viric@24
   257
        printf("<div class=\"alineo\">");
viric@14
   258
    do
viric@14
   259
    {
viric@14
   260
        int c;
viric@14
   261
        /* Check pos only if length >= 0 */
viric@14
   262
        if (length >= 0 && pos >= length)
viric@14
   263
            break;
viric@17
   264
        c = my_fgetc(in);
viric@17
   265
        if (c == EOF || c == END_OF_URL)
viric@14
   266
            break;
viric@24
   267
        if (c == '\r')
viric@24
   268
            continue;
viric@24
   269
viric@24
   270
        /* Process 'last' for eventual line break */
viric@24
   271
        if (give_html && c == '\n' && last == '\n')
viric@24
   272
            printf("</div>\n<div class=\"alineo\">");
viric@24
   273
        last = c;
viric@24
   274
viric@18
   275
        if (skip_non_ascii || is_ASCII(c))
viric@14
   276
        {
viric@14
   277
            if (wordpos != 0)
viric@14
   278
            {
viric@14
   279
                tmp[wordpos] = 0;
viric@14
   280
                give_accent_to_word(tmp);
viric@14
   281
                wordpos = 0;
viric@14
   282
            }
viric@14
   283
            putchar(c);
viric@18
   284
            /* End of skip_non_ascii when we find an
viric@18
   285
             * ascii string */
viric@18
   286
            if (skip_non_ascii && is_ASCII(c))
viric@18
   287
                skip_non_ascii = 0;
viric@14
   288
        }
viric@14
   289
        else /* non-ASCII - we consider it russian */
viric@14
   290
        {
viric@14
   291
            tmp[wordpos++] = c;
viric@18
   292
            if (wordpos >= MAXWORD)
viric@18
   293
            {
viric@18
   294
                /* Dump the word and the rest of non-ASCII, because
viric@18
   295
                 * we cannot fit it in 'tmp' */
viric@18
   296
                int i;
viric@18
   297
                for(i=0; i < wordpos; ++i)
viric@18
   298
                    putchar(tmp[i]);
viric@18
   299
                wordpos=0;
viric@18
   300
                skip_non_ascii = 1;
viric@18
   301
            }
viric@14
   302
        }
viric@14
   303
viric@14
   304
        pos += 1;
viric@14
   305
    } while(1);
viric@17
   306
viric@17
   307
    /* End word */
viric@17
   308
    if (wordpos != 0)
viric@17
   309
    {
viric@17
   310
        tmp[wordpos] = 0;
viric@17
   311
        give_accent_to_word(tmp);
viric@17
   312
        wordpos = 0;
viric@17
   313
    }
viric@24
   314
    if (give_html)
viric@24
   315
        printf("</div>");
viric@17
   316
}
viric@17
   317
viric@17
   318
static print_http_header()
viric@17
   319
{
viric@17
   320
    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
viric@17
   321
}
viric@17
   322
viric@17
   323
int eat_form_ok()
viric@17
   324
{
viric@17
   325
    const char mask[] = "teksto=";
viric@17
   326
    char tmp[sizeof(mask)];
viric@17
   327
    fread(tmp, 1, sizeof(mask)-1, stdin);
viric@17
   328
    tmp[sizeof(mask)-1] = 0;
viric@17
   329
    if (strcmp(mask, tmp) == 0)
viric@17
   330
        return 1;
viric@17
   331
    return 0;
viric@14
   332
}
viric@14
   333
viric@14
   334
int main()
viric@14
   335
{
viric@17
   336
    char *c;
viric@17
   337
viric@17
   338
    init_dictionary(&dakcentiga, "akcentiga");
viric@24
   339
    init_dictionary(&dsignifoj, "signifoj");
viric@17
   340
viric@17
   341
    if (c = getenv("CONTENT_LENGTH"))
viric@17
   342
    {
viric@17
   343
        content_length = atoi(c);
viric@17
   344
        is_http = 1;
viric@17
   345
    }
viric@17
   346
    if (is_http)
viric@17
   347
    {
viric@17
   348
        print_http_header();
viric@17
   349
        if (!eat_form_ok())
viric@17
   350
            return -1;
viric@17
   351
    }
viric@24
   352
viric@24
   353
    if (give_html)
viric@24
   354
        print_html_header();
viric@24
   355
    /* We pass -1 so we don't check content length */
viric@14
   356
    process_text(stdin, 0, -1);
viric@24
   357
viric@24
   358
    if (give_html)
viric@24
   359
        print_html_footer();
viric@17
   360
    end_dictionary(&dakcentiga);
viric@17
   361
viric@17
   362
    return 0;
viric@14
   363
}