parse_text.c
author viric@llimona
Sun, 02 Sep 2007 16:01:27 +0200
changeset 27 153c479aa0bc
parent 25 8d524bb8dcea
child 28 75b6d5659a19
permissions -rw-r--r--
LEGUMIN kaj aliaj gxisdatigoj.
viric@14
     1
#include <stdio.h>
viric@17
     2
#include <stdlib.h>
viric@14
     3
#include "dictre.h"
viric@14
     4
viric@17
     5
static int is_http = 0;
viric@24
     6
static int give_html = 1;
viric@17
     7
static int content_length = -1;
viric@17
     8
static struct Dict dakcentiga;
viric@24
     9
static struct Dict dsignifoj;
viric@24
    10
viric@24
    11
static void print_html_header()
viric@24
    12
{
viric@24
    13
    printf(
viric@24
    14
"<html>\n"
viric@24
    15
"<head>\n"
viric@24
    16
"    <meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n"
viric@24
    17
"    <title>Akcentigita teksto</title>\n"
viric@24
    18
"</head>\n"
viric@24
    19
"<style type=\"text/css\" media=\"screen\">\n"
viric@24
    20
"div.difino { display: none ;\n"
viric@24
    21
"    position: absolute;\n"
viric@24
    22
"    left: 0;\n"
viric@24
    23
"    background: moccasin;\n"
viric@24
    24
"    border: thin;\n"
viric@24
    25
"    border-color: black;\n"
viric@24
    26
"    padding: 10px 10px 10px 10px;\n"
viric@24
    27
"    text-indent: 0em;\n"
viric@24
    28
"    text-align: left;\n"
viric@24
    29
"    }\n"
viric@24
    30
"div.alineo { text-indent: 2em ; text-align: justify }\n"
viric@24
    31
"</style>\n"
viric@24
    32
"<body>\n"
viric@24
    33
"<script language=\"javascript\">\n"
viric@24
    34
"var blocked = \"\";\n"
viric@24
    35
"\n"
viric@24
    36
"function display(element, ev)\n"
viric@24
    37
"{\n"
viric@24
    38
"    var x,y;\n"
viric@24
    39
"    x = ev.screenX + window.pageXOffset;\n"
viric@24
    40
"    y = ev.screenY + window.pageYOffset;\n"
viric@24
    41
"\n"
viric@24
    42
"    if (blocked != \"\")\n"
viric@24
    43
"        return;\n"
viric@24
    44
#if 0
viric@24
    45
"\n"
viric@24
    46
"    if (x + 320 > window.innerWidth)\n"
viric@24
    47
"        x = window.innerWidth - 320;\n"
viric@24
    48
"\n"
viric@24
    49
"    /*\n"
viric@24
    50
"    if (y < window.innerHeight / 2)\n"
viric@24
    51
"    {\n"
viric@24
    52
"        y += 5;\n"
viric@24
    53
"        document.getElementById(element).style.top = y;\n"
viric@24
    54
"    } else {\n"
viric@24
    55
"        y -= 5;\n"
viric@24
    56
"        document.getElementById(element).style.top = 0;\n"
viric@24
    57
"        document.getElementById(element).style.bottom = y;\n"
viric@24
    58
"    }\n"
viric@24
    59
"    */\n"
viric@24
    60
#endif
viric@24
    61
"    y += 5;\n"
viric@24
    62
"    document.getElementById(element).style.top = y + 'px';\n"
viric@24
    63
"    \n"
viric@24
    64
/*
viric@24
    65
"    document.getElementById(element).style.left = 0;\n"
viric@24
    66
*/
viric@24
    67
"    document.getElementById(element).style.display = 'block';\n"
viric@24
    68
"}\n"
viric@24
    69
"\n"
viric@24
    70
"function undisplay(element)\n"
viric@24
    71
"{\n"
viric@24
    72
"    document.getElementById(element).style.display = 'none';\n"
viric@24
    73
"}\n"
viric@24
    74
"\n"
viric@24
    75
"function click(element, ev)\n"
viric@24
    76
"{\n"
viric@24
    77
"    if (blocked == \"\")\n"
viric@24
    78
"    {\n"
viric@24
    79
"        display(element, ev);\n"
viric@24
    80
"        blocked = element;\n"
viric@24
    81
"    }\n"
viric@24
    82
"    else\n"
viric@24
    83
"    {\n"
viric@24
    84
"        undisplay(blocked);\n"
viric@24
    85
"        blocked = \"\";\n"
viric@24
    86
"    }\n"
viric@24
    87
"}\n"
viric@24
    88
"\n"
viric@24
    89
"</script>"
viric@24
    90
);
viric@24
    91
viric@24
    92
}
viric@24
    93
viric@24
    94
static void print_html_footer()
viric@24
    95
{
viric@24
    96
    printf("</body></html>\n");
viric@24
    97
}
viric@24
    98
viric@25
    99
static void dump_signifo_with_p(const unsigned char *word,
viric@25
   100
        const unsigned char *signifo)
viric@24
   101
{
viric@24
   102
    int i;
viric@24
   103
    char last;
viric@24
   104
viric@24
   105
    printf("<p>## Se moto %s:</p>\n<p>", word);
viric@24
   106
viric@24
   107
    last = 0;
viric@24
   108
    i=0;
viric@24
   109
    do
viric@24
   110
    {
viric@24
   111
        char nun;
viric@24
   112
        nun = *signifo;
viric@24
   113
        if (nun == '\0')
viric@24
   114
            break;
viric@24
   115
        if (nun == '\n' && last == '\n')
viric@24
   116
            printf("</p><p>");
viric@24
   117
        putchar(nun);
viric@24
   118
        last = nun;
viric@24
   119
        ++signifo;
viric@24
   120
    } while(1);
viric@24
   121
    printf("</p>");
viric@24
   122
}
viric@24
   123
viric@24
   124
static void print_start_signifoj(int id)
viric@24
   125
{
viric@24
   126
    printf("<div id=\"vorto%i\" onclick=\"click('vorto%i', event);\" "
viric@24
   127
            "class=\"difino\"><p>\n", id, id);
viric@24
   128
}
viric@24
   129
viric@24
   130
static void print_end_signifoj()
viric@24
   131
{
viric@24
   132
    printf("</p></div>");
viric@24
   133
}
viric@24
   134
viric@24
   135
/* This expects the word to start exactly at *wordlist */
viric@24
   136
static void print_signifoj(char *wordlist, int id)
viric@24
   137
{
viric@24
   138
    char * next_space;
viric@24
   139
    char def[MAXDEF];
viric@24
   140
    int start_shown = 0;
viric@24
   141
viric@24
   142
    do {
viric@24
   143
        next_space = strchr(wordlist, ' ');
viric@24
   144
        if (next_space == 0)
viric@24
   145
            next_space = strchr(wordlist, '\n');
viric@24
   146
        if (next_space == 0)
viric@24
   147
            next_space = strchr(wordlist, '\0');
viric@24
   148
        if (next_space != 0 && next_space != wordlist)
viric@24
   149
        {
viric@24
   150
            int spacepos;
viric@24
   151
            spacepos = next_space - wordlist;
viric@24
   152
            wordlist[spacepos] = 0;
viric@24
   153
viric@24
   154
            /* wordlist has the word to search */
viric@24
   155
            find_def(&dsignifoj, wordlist, def);
viric@24
   156
            if (def[0])
viric@24
   157
            {
viric@24
   158
                if (!start_shown)
viric@24
   159
                {
viric@24
   160
                    print_start_signifoj(id);
viric@24
   161
                    start_shown = 1;
viric@24
   162
                }
viric@24
   163
                dump_signifo_with_p(wordlist, def);
viric@24
   164
            }
viric@24
   165
        } else
viric@24
   166
            break;
viric@24
   167
        wordlist = next_space + 1;
viric@24
   168
    } while(1);
viric@24
   169
viric@24
   170
    if (start_shown)
viric@24
   171
        print_end_signifoj();
viric@24
   172
}
viric@24
   173
viric@24
   174
static print_accented(const char *word, int id)
viric@24
   175
{
viric@24
   176
    if (give_html)
viric@24
   177
    {
viric@24
   178
        printf("<span class=\"vorto\" onclick=\"click('vorto%i', event);\">"
viric@24
   179
                "%s</span>",id,word);
viric@24
   180
    } else
viric@24
   181
        printf("%s", word);
viric@24
   182
}
viric@17
   183
viric@15
   184
static void give_accent_to_word(const char *word)
viric@14
   185
{
viric@14
   186
    char def[MAXDEF];
viric@15
   187
    char low[MAXWORD];
viric@15
   188
    char recased[MAXWORD];
viric@15
   189
    enum Case vcase[MAXWORD];
viric@24
   190
    static int id = 1;
viric@14
   191
viric@15
   192
    /* Get case */
viric@15
   193
    get_case(vcase, word);
viric@15
   194
viric@15
   195
    /* Get lowercase version */
viric@15
   196
    get_lowcase_str(low, word);
viric@15
   197
viric@15
   198
    /* Find the lowercase version */
viric@17
   199
    find_def(&dakcentiga, low, def);
viric@14
   200
    if (def[0] != 0) /* found */
viric@14
   201
    {
viric@14
   202
        /* Print the word UNTIL a space.
viric@14
   203
         * the definition will have the form:
viric@14
   204
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
viric@14
   205
        char *first_space;
viric@15
   206
        char spacepos;
viric@14
   207
        first_space = strchr(def, ' ');
viric@14
   208
        if (first_space != 0) /* Space found */
viric@15
   209
        {
viric@15
   210
            spacepos = first_space - def;
viric@15
   211
            def[spacepos] = 0; /* Mark an end of string */
viric@15
   212
            reapply_case(recased, def, vcase);
viric@24
   213
            print_accented(recased, id);
viric@24
   214
            if (give_html)
viric@24
   215
                print_signifoj(first_space + 1 /*' '*/, id);
viric@24
   216
            ++id;
viric@15
   217
        }
viric@14
   218
        return;
viric@14
   219
    }
viric@24
   220
    else if (give_html)
viric@24
   221
    {
viric@24
   222
        char def[MAXDEF];
viric@24
   223
        /* OPTIMIZE: This, if find, will react in a second search at
viric@24
   224
         * print_signifoj() */
viric@24
   225
        find_def(&dsignifoj, low, def);
viric@24
   226
        if (def[0])
viric@24
   227
        {
viric@24
   228
            print_accented(word, id);
viric@24
   229
            print_signifoj(low, id);
viric@24
   230
            ++id;
viric@24
   231
            return;
viric@24
   232
        }
viric@24
   233
    }
viric@14
   234
viric@24
   235
    /* else ... */
viric@24
   236
viric@24
   237
    /* if first_space == 0 or word not found _AND_ word not found in meanings
viric@24
   238
     * when give_html...*/
viric@15
   239
    printf("%s", word);
viric@14
   240
}
viric@14
   241
viric@17
   242
static int my_fgetc(FILE *f)
viric@17
   243
{
viric@17
   244
    if (is_http)
viric@17
   245
        return http_getc(f);
viric@17
   246
    else
viric@17
   247
        return fgetc(f);
viric@17
   248
}
viric@17
   249
viric@14
   250
static void process_text(FILE *in, int pos, int length)
viric@14
   251
{
viric@14
   252
    unsigned char tmp[MAXWORD];
viric@14
   253
    int wordpos = 0;
viric@18
   254
    int skip_non_ascii = 0;
viric@24
   255
    int last = 0;
viric@18
   256
viric@24
   257
    if (give_html)
viric@24
   258
        printf("<div class=\"alineo\">");
viric@14
   259
    do
viric@14
   260
    {
viric@14
   261
        int c;
viric@14
   262
        /* Check pos only if length >= 0 */
viric@14
   263
        if (length >= 0 && pos >= length)
viric@14
   264
            break;
viric@17
   265
        c = my_fgetc(in);
viric@17
   266
        if (c == EOF || c == END_OF_URL)
viric@14
   267
            break;
viric@24
   268
        if (c == '\r')
viric@24
   269
            continue;
viric@24
   270
viric@24
   271
        /* Process 'last' for eventual line break */
viric@24
   272
        if (give_html && c == '\n' && last == '\n')
viric@24
   273
            printf("</div>\n<div class=\"alineo\">");
viric@24
   274
        last = c;
viric@24
   275
viric@18
   276
        if (skip_non_ascii || is_ASCII(c))
viric@14
   277
        {
viric@14
   278
            if (wordpos != 0)
viric@14
   279
            {
viric@14
   280
                tmp[wordpos] = 0;
viric@14
   281
                give_accent_to_word(tmp);
viric@14
   282
                wordpos = 0;
viric@14
   283
            }
viric@14
   284
            putchar(c);
viric@18
   285
            /* End of skip_non_ascii when we find an
viric@18
   286
             * ascii string */
viric@18
   287
            if (skip_non_ascii && is_ASCII(c))
viric@18
   288
                skip_non_ascii = 0;
viric@14
   289
        }
viric@14
   290
        else /* non-ASCII - we consider it russian */
viric@14
   291
        {
viric@14
   292
            tmp[wordpos++] = c;
viric@18
   293
            if (wordpos >= MAXWORD)
viric@18
   294
            {
viric@18
   295
                /* Dump the word and the rest of non-ASCII, because
viric@18
   296
                 * we cannot fit it in 'tmp' */
viric@18
   297
                int i;
viric@18
   298
                for(i=0; i < wordpos; ++i)
viric@18
   299
                    putchar(tmp[i]);
viric@18
   300
                wordpos=0;
viric@18
   301
                skip_non_ascii = 1;
viric@18
   302
            }
viric@14
   303
        }
viric@14
   304
viric@14
   305
        pos += 1;
viric@14
   306
    } while(1);
viric@17
   307
viric@17
   308
    /* End word */
viric@17
   309
    if (wordpos != 0)
viric@17
   310
    {
viric@17
   311
        tmp[wordpos] = 0;
viric@17
   312
        give_accent_to_word(tmp);
viric@17
   313
        wordpos = 0;
viric@17
   314
    }
viric@24
   315
    if (give_html)
viric@24
   316
        printf("</div>");
viric@17
   317
}
viric@17
   318
viric@17
   319
static print_http_header()
viric@17
   320
{
viric@17
   321
    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
viric@17
   322
}
viric@17
   323
viric@17
   324
int eat_form_ok()
viric@17
   325
{
viric@17
   326
    const char mask[] = "teksto=";
viric@17
   327
    char tmp[sizeof(mask)];
viric@17
   328
    fread(tmp, 1, sizeof(mask)-1, stdin);
viric@17
   329
    tmp[sizeof(mask)-1] = 0;
viric@17
   330
    if (strcmp(mask, tmp) == 0)
viric@17
   331
        return 1;
viric@17
   332
    return 0;
viric@14
   333
}
viric@14
   334
viric@14
   335
int main()
viric@14
   336
{
viric@17
   337
    char *c;
viric@17
   338
viric@17
   339
    init_dictionary(&dakcentiga, "akcentiga");
viric@24
   340
    init_dictionary(&dsignifoj, "signifoj");
viric@17
   341
viric@17
   342
    if (c = getenv("CONTENT_LENGTH"))
viric@17
   343
    {
viric@17
   344
        content_length = atoi(c);
viric@17
   345
        is_http = 1;
viric@17
   346
    }
viric@17
   347
    if (is_http)
viric@17
   348
    {
viric@17
   349
        print_http_header();
viric@17
   350
        if (!eat_form_ok())
viric@17
   351
            return -1;
viric@17
   352
    }
viric@24
   353
viric@24
   354
    if (give_html)
viric@24
   355
        print_html_header();
viric@24
   356
    /* We pass -1 so we don't check content length */
viric@14
   357
    process_text(stdin, 0, -1);
viric@24
   358
viric@24
   359
    if (give_html)
viric@24
   360
        print_html_footer();
viric@17
   361
    end_dictionary(&dakcentiga);
viric@17
   362
viric@17
   363
    return 0;
viric@14
   364
}