parse_text.c
author viric@llimona
Sat, 08 Sep 2007 09:12:50 +0200
changeset 28 75b6d5659a19
parent 25 8d524bb8dcea
permissions -rw-r--r--
Better browser support.
viric@14
     1
#include <stdio.h>
viric@17
     2
#include <stdlib.h>
viric@14
     3
#include "dictre.h"
viric@14
     4
viric@17
     5
static int is_http = 0;
viric@24
     6
static int give_html = 1;
viric@17
     7
static int content_length = -1;
viric@17
     8
static struct Dict dakcentiga;
viric@24
     9
static struct Dict dsignifoj;
viric@24
    10
viric@24
    11
static void print_html_header()
viric@24
    12
{
viric@24
    13
    printf(
viric@24
    14
"<html>\n"
viric@24
    15
"<head>\n"
viric@24
    16
"    <meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n"
viric@24
    17
"    <title>Akcentigita teksto</title>\n"
viric@24
    18
"</head>\n"
viric@24
    19
"<style type=\"text/css\" media=\"screen\">\n"
viric@24
    20
"div.difino { display: none ;\n"
viric@24
    21
"    position: absolute;\n"
viric@24
    22
"    left: 0;\n"
viric@24
    23
"    background: moccasin;\n"
viric@24
    24
"    border: thin;\n"
viric@24
    25
"    border-color: black;\n"
viric@24
    26
"    padding: 10px 10px 10px 10px;\n"
viric@24
    27
"    text-indent: 0em;\n"
viric@24
    28
"    text-align: left;\n"
viric@24
    29
"    }\n"
viric@24
    30
"div.alineo { text-indent: 2em ; text-align: justify }\n"
viric@24
    31
"</style>\n"
viric@24
    32
"<body>\n"
viric@24
    33
"<script language=\"javascript\">\n"
viric@24
    34
"var blocked = \"\";\n"
viric@24
    35
"\n"
viric@24
    36
"function display(element, ev)\n"
viric@24
    37
"{\n"
viric@24
    38
"    var x,y;\n"
viric@28
    39
#if 0
viric@28
    40
"    x = ev.x + window.pageXOffset;\n"
viric@28
    41
#endif
viric@28
    42
"    y = ev.pageY;\n"
viric@24
    43
"\n"
viric@24
    44
"    if (blocked != \"\")\n"
viric@24
    45
"        return;\n"
viric@24
    46
#if 0
viric@24
    47
"\n"
viric@24
    48
"    if (x + 320 > window.innerWidth)\n"
viric@24
    49
"        x = window.innerWidth - 320;\n"
viric@24
    50
"\n"
viric@24
    51
"    /*\n"
viric@24
    52
"    if (y < window.innerHeight / 2)\n"
viric@24
    53
"    {\n"
viric@24
    54
"        y += 5;\n"
viric@24
    55
"        document.getElementById(element).style.top = y;\n"
viric@24
    56
"    } else {\n"
viric@24
    57
"        y -= 5;\n"
viric@24
    58
"        document.getElementById(element).style.top = 0;\n"
viric@24
    59
"        document.getElementById(element).style.bottom = y;\n"
viric@24
    60
"    }\n"
viric@24
    61
"    */\n"
viric@24
    62
#endif
viric@24
    63
"    y += 5;\n"
viric@24
    64
"    document.getElementById(element).style.top = y + 'px';\n"
viric@24
    65
"    \n"
viric@24
    66
/*
viric@24
    67
"    document.getElementById(element).style.left = 0;\n"
viric@24
    68
*/
viric@24
    69
"    document.getElementById(element).style.display = 'block';\n"
viric@24
    70
"}\n"
viric@24
    71
"\n"
viric@24
    72
"function undisplay(element)\n"
viric@24
    73
"{\n"
viric@24
    74
"    document.getElementById(element).style.display = 'none';\n"
viric@24
    75
"}\n"
viric@24
    76
"\n"
viric@28
    77
"function klako(element, ev)\n"
viric@24
    78
"{\n"
viric@24
    79
"    if (blocked == \"\")\n"
viric@24
    80
"    {\n"
viric@24
    81
"        display(element, ev);\n"
viric@24
    82
"        blocked = element;\n"
viric@24
    83
"    }\n"
viric@24
    84
"    else\n"
viric@24
    85
"    {\n"
viric@24
    86
"        undisplay(blocked);\n"
viric@24
    87
"        blocked = \"\";\n"
viric@24
    88
"    }\n"
viric@24
    89
"}\n"
viric@24
    90
"\n"
viric@24
    91
"</script>"
viric@24
    92
);
viric@24
    93
viric@24
    94
}
viric@24
    95
viric@24
    96
static void print_html_footer()
viric@24
    97
{
viric@24
    98
    printf("</body></html>\n");
viric@24
    99
}
viric@24
   100
viric@25
   101
static void dump_signifo_with_p(const unsigned char *word,
viric@25
   102
        const unsigned char *signifo)
viric@24
   103
{
viric@24
   104
    int i;
viric@24
   105
    char last;
viric@24
   106
viric@24
   107
    printf("<p>## Se moto %s:</p>\n<p>", word);
viric@24
   108
viric@24
   109
    last = 0;
viric@24
   110
    i=0;
viric@24
   111
    do
viric@24
   112
    {
viric@24
   113
        char nun;
viric@24
   114
        nun = *signifo;
viric@24
   115
        if (nun == '\0')
viric@24
   116
            break;
viric@24
   117
        if (nun == '\n' && last == '\n')
viric@24
   118
            printf("</p><p>");
viric@24
   119
        putchar(nun);
viric@24
   120
        last = nun;
viric@24
   121
        ++signifo;
viric@24
   122
    } while(1);
viric@24
   123
    printf("</p>");
viric@24
   124
}
viric@24
   125
viric@24
   126
static void print_start_signifoj(int id)
viric@24
   127
{
viric@28
   128
    printf("<div id=\"vorto%i\" onclick=\"klako('vorto%i', event);\" "
viric@24
   129
            "class=\"difino\"><p>\n", id, id);
viric@24
   130
}
viric@24
   131
viric@24
   132
static void print_end_signifoj()
viric@24
   133
{
viric@24
   134
    printf("</p></div>");
viric@24
   135
}
viric@24
   136
viric@24
   137
/* This expects the word to start exactly at *wordlist */
viric@24
   138
static void print_signifoj(char *wordlist, int id)
viric@24
   139
{
viric@24
   140
    char * next_space;
viric@24
   141
    char def[MAXDEF];
viric@24
   142
    int start_shown = 0;
viric@24
   143
viric@24
   144
    do {
viric@24
   145
        next_space = strchr(wordlist, ' ');
viric@24
   146
        if (next_space == 0)
viric@24
   147
            next_space = strchr(wordlist, '\n');
viric@24
   148
        if (next_space == 0)
viric@24
   149
            next_space = strchr(wordlist, '\0');
viric@24
   150
        if (next_space != 0 && next_space != wordlist)
viric@24
   151
        {
viric@24
   152
            int spacepos;
viric@24
   153
            spacepos = next_space - wordlist;
viric@24
   154
            wordlist[spacepos] = 0;
viric@24
   155
viric@24
   156
            /* wordlist has the word to search */
viric@24
   157
            find_def(&dsignifoj, wordlist, def);
viric@24
   158
            if (def[0])
viric@24
   159
            {
viric@24
   160
                if (!start_shown)
viric@24
   161
                {
viric@24
   162
                    print_start_signifoj(id);
viric@24
   163
                    start_shown = 1;
viric@24
   164
                }
viric@24
   165
                dump_signifo_with_p(wordlist, def);
viric@24
   166
            }
viric@24
   167
        } else
viric@24
   168
            break;
viric@24
   169
        wordlist = next_space + 1;
viric@24
   170
    } while(1);
viric@24
   171
viric@24
   172
    if (start_shown)
viric@24
   173
        print_end_signifoj();
viric@24
   174
}
viric@24
   175
viric@24
   176
static print_accented(const char *word, int id)
viric@24
   177
{
viric@24
   178
    if (give_html)
viric@24
   179
    {
viric@28
   180
        printf("<span class=\"vorto\" onclick=\"klako('vorto%i', event);\">"
viric@24
   181
                "%s</span>",id,word);
viric@24
   182
    } else
viric@24
   183
        printf("%s", word);
viric@24
   184
}
viric@17
   185
viric@15
   186
static void give_accent_to_word(const char *word)
viric@14
   187
{
viric@14
   188
    char def[MAXDEF];
viric@15
   189
    char low[MAXWORD];
viric@15
   190
    char recased[MAXWORD];
viric@15
   191
    enum Case vcase[MAXWORD];
viric@24
   192
    static int id = 1;
viric@14
   193
viric@15
   194
    /* Get case */
viric@15
   195
    get_case(vcase, word);
viric@15
   196
viric@15
   197
    /* Get lowercase version */
viric@15
   198
    get_lowcase_str(low, word);
viric@15
   199
viric@15
   200
    /* Find the lowercase version */
viric@17
   201
    find_def(&dakcentiga, low, def);
viric@14
   202
    if (def[0] != 0) /* found */
viric@14
   203
    {
viric@14
   204
        /* Print the word UNTIL a space.
viric@14
   205
         * the definition will have the form:
viric@14
   206
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
viric@14
   207
        char *first_space;
viric@15
   208
        char spacepos;
viric@14
   209
        first_space = strchr(def, ' ');
viric@14
   210
        if (first_space != 0) /* Space found */
viric@15
   211
        {
viric@15
   212
            spacepos = first_space - def;
viric@15
   213
            def[spacepos] = 0; /* Mark an end of string */
viric@15
   214
            reapply_case(recased, def, vcase);
viric@24
   215
            print_accented(recased, id);
viric@24
   216
            if (give_html)
viric@24
   217
                print_signifoj(first_space + 1 /*' '*/, id);
viric@24
   218
            ++id;
viric@15
   219
        }
viric@14
   220
        return;
viric@14
   221
    }
viric@24
   222
    else if (give_html)
viric@24
   223
    {
viric@24
   224
        char def[MAXDEF];
viric@24
   225
        /* OPTIMIZE: This, if find, will react in a second search at
viric@24
   226
         * print_signifoj() */
viric@24
   227
        find_def(&dsignifoj, low, def);
viric@24
   228
        if (def[0])
viric@24
   229
        {
viric@24
   230
            print_accented(word, id);
viric@24
   231
            print_signifoj(low, id);
viric@24
   232
            ++id;
viric@24
   233
            return;
viric@24
   234
        }
viric@24
   235
    }
viric@14
   236
viric@24
   237
    /* else ... */
viric@24
   238
viric@24
   239
    /* if first_space == 0 or word not found _AND_ word not found in meanings
viric@24
   240
     * when give_html...*/
viric@15
   241
    printf("%s", word);
viric@14
   242
}
viric@14
   243
viric@17
   244
static int my_fgetc(FILE *f)
viric@17
   245
{
viric@17
   246
    if (is_http)
viric@17
   247
        return http_getc(f);
viric@17
   248
    else
viric@17
   249
        return fgetc(f);
viric@17
   250
}
viric@17
   251
viric@14
   252
static void process_text(FILE *in, int pos, int length)
viric@14
   253
{
viric@14
   254
    unsigned char tmp[MAXWORD];
viric@14
   255
    int wordpos = 0;
viric@18
   256
    int skip_non_ascii = 0;
viric@24
   257
    int last = 0;
viric@18
   258
viric@24
   259
    if (give_html)
viric@24
   260
        printf("<div class=\"alineo\">");
viric@14
   261
    do
viric@14
   262
    {
viric@14
   263
        int c;
viric@14
   264
        /* Check pos only if length >= 0 */
viric@14
   265
        if (length >= 0 && pos >= length)
viric@14
   266
            break;
viric@17
   267
        c = my_fgetc(in);
viric@17
   268
        if (c == EOF || c == END_OF_URL)
viric@14
   269
            break;
viric@24
   270
        if (c == '\r')
viric@24
   271
            continue;
viric@24
   272
viric@24
   273
        /* Process 'last' for eventual line break */
viric@24
   274
        if (give_html && c == '\n' && last == '\n')
viric@24
   275
            printf("</div>\n<div class=\"alineo\">");
viric@24
   276
        last = c;
viric@24
   277
viric@18
   278
        if (skip_non_ascii || is_ASCII(c))
viric@14
   279
        {
viric@14
   280
            if (wordpos != 0)
viric@14
   281
            {
viric@14
   282
                tmp[wordpos] = 0;
viric@14
   283
                give_accent_to_word(tmp);
viric@14
   284
                wordpos = 0;
viric@14
   285
            }
viric@14
   286
            putchar(c);
viric@18
   287
            /* End of skip_non_ascii when we find an
viric@18
   288
             * ascii string */
viric@18
   289
            if (skip_non_ascii && is_ASCII(c))
viric@18
   290
                skip_non_ascii = 0;
viric@14
   291
        }
viric@14
   292
        else /* non-ASCII - we consider it russian */
viric@14
   293
        {
viric@14
   294
            tmp[wordpos++] = c;
viric@18
   295
            if (wordpos >= MAXWORD)
viric@18
   296
            {
viric@18
   297
                /* Dump the word and the rest of non-ASCII, because
viric@18
   298
                 * we cannot fit it in 'tmp' */
viric@18
   299
                int i;
viric@18
   300
                for(i=0; i < wordpos; ++i)
viric@18
   301
                    putchar(tmp[i]);
viric@18
   302
                wordpos=0;
viric@18
   303
                skip_non_ascii = 1;
viric@18
   304
            }
viric@14
   305
        }
viric@14
   306
viric@14
   307
        pos += 1;
viric@14
   308
    } while(1);
viric@17
   309
viric@17
   310
    /* End word */
viric@17
   311
    if (wordpos != 0)
viric@17
   312
    {
viric@17
   313
        tmp[wordpos] = 0;
viric@17
   314
        give_accent_to_word(tmp);
viric@17
   315
        wordpos = 0;
viric@17
   316
    }
viric@24
   317
    if (give_html)
viric@24
   318
        printf("</div>");
viric@17
   319
}
viric@17
   320
viric@17
   321
static print_http_header()
viric@17
   322
{
viric@17
   323
    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
viric@17
   324
}
viric@17
   325
viric@17
   326
int eat_form_ok()
viric@17
   327
{
viric@17
   328
    const char mask[] = "teksto=";
viric@17
   329
    char tmp[sizeof(mask)];
viric@17
   330
    fread(tmp, 1, sizeof(mask)-1, stdin);
viric@17
   331
    tmp[sizeof(mask)-1] = 0;
viric@17
   332
    if (strcmp(mask, tmp) == 0)
viric@17
   333
        return 1;
viric@17
   334
    return 0;
viric@14
   335
}
viric@14
   336
viric@14
   337
int main()
viric@14
   338
{
viric@17
   339
    char *c;
viric@17
   340
viric@17
   341
    init_dictionary(&dakcentiga, "akcentiga");
viric@24
   342
    init_dictionary(&dsignifoj, "signifoj");
viric@17
   343
viric@17
   344
    if (c = getenv("CONTENT_LENGTH"))
viric@17
   345
    {
viric@17
   346
        content_length = atoi(c);
viric@17
   347
        is_http = 1;
viric@17
   348
    }
viric@17
   349
    if (is_http)
viric@17
   350
    {
viric@17
   351
        print_http_header();
viric@17
   352
        if (!eat_form_ok())
viric@17
   353
            return -1;
viric@17
   354
    }
viric@24
   355
viric@24
   356
    if (give_html)
viric@24
   357
        print_html_header();
viric@24
   358
    /* We pass -1 so we don't check content length */
viric@14
   359
    process_text(stdin, 0, -1);
viric@24
   360
viric@24
   361
    if (give_html)
viric@24
   362
        print_html_footer();
viric@17
   363
    end_dictionary(&dakcentiga);
viric@17
   364
viric@17
   365
    return 0;
viric@14
   366
}