parse_text.c
author viric@llimona
Sun, 02 Sep 2007 00:02:48 +0200
changeset 23 97feccfc5215
parent 18 64ed4238657f
child 24 026a2ba0ce16
permissions -rw-r--r--
Aldonis rus_eng_full.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
     2
#include <stdlib.h>
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     3
#include "dictre.h"
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     4
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
     5
static int is_http = 0;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
     6
static int content_length = -1;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
     7
static struct Dict dakcentiga;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
     8
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     9
static void give_accent_to_word(const char *word)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    10
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    11
    char def[MAXDEF];
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    12
    char low[MAXWORD];
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    13
    char recased[MAXWORD];
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    14
    enum Case vcase[MAXWORD];
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    15
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    16
    /* Get case */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    17
    get_case(vcase, word);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    18
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    19
    /* Get lowercase version */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    20
    get_lowcase_str(low, word);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    21
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    22
    /* Find the lowercase version */
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    23
    find_def(&dakcentiga, low, def);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    24
    if (def[0] != 0) /* found */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    25
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    26
        /* Print the word UNTIL a space.
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    27
         * the definition will have the form:
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    28
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    29
        char *first_space;
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    30
        char spacepos;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    31
        first_space = strchr(def, ' ');
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    32
        if (first_space != 0) /* Space found */
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    33
        {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    34
            spacepos = first_space - def;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    35
            def[spacepos] = 0; /* Mark an end of string */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    36
            reapply_case(recased, def, vcase);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    37
            printf("%s", recased);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    38
        }
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    39
        return;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    40
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    41
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    42
    /* if first_space == 0 or word not found */
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    43
    printf("%s", word);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    44
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    45
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    46
static int my_fgetc(FILE *f)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    47
{
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    48
    if (is_http)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    49
        return http_getc(f);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    50
    else
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    51
        return fgetc(f);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    52
}
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    53
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    54
static void process_text(FILE *in, int pos, int length)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    55
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    56
    unsigned char tmp[MAXWORD];
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    57
    int wordpos = 0;
18
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    58
    int skip_non_ascii = 0;
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    59
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    60
    do
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    61
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    62
        int c;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    63
        /* Check pos only if length >= 0 */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    64
        if (length >= 0 && pos >= length)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    65
            break;
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    66
        c = my_fgetc(in);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
    67
        if (c == EOF || c == END_OF_URL)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    68
            break;
18
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    69
        if (skip_non_ascii || is_ASCII(c))
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    70
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    71
            if (wordpos != 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    72
            {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    73
                tmp[wordpos] = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    74
                give_accent_to_word(tmp);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    75
                wordpos = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    76
            }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    77
            putchar(c);
18
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    78
            /* End of skip_non_ascii when we find an
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    79
             * ascii string */
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    80
            if (skip_non_ascii && is_ASCII(c))
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    81
                skip_non_ascii = 0;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    82
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    83
        else /* non-ASCII - we consider it russian */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    84
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    85
            tmp[wordpos++] = c;
18
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    86
            if (wordpos >= MAXWORD)
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    87
            {
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    88
                /* Dump the word and the rest of non-ASCII, because
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    89
                 * we cannot fit it in 'tmp' */
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    90
                int i;
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    91
                for(i=0; i < wordpos; ++i)
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    92
                    putchar(tmp[i]);
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    93
                wordpos=0;
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    94
                skip_non_ascii = 1;
64ed4238657f Fixed possible buffer overflow.
viric@llimona
parents: 17
diff changeset
    95
            }
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    96
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    97
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    98
        pos += 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    99
    } while(1);
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   100
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   101
    /* End word */
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   102
    if (wordpos != 0)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   103
    {
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   104
        tmp[wordpos] = 0;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   105
        give_accent_to_word(tmp);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   106
        wordpos = 0;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   107
    }
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   108
}
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   109
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   110
static print_http_header()
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   111
{
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   112
    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   113
}
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   114
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   115
int eat_form_ok()
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   116
{
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   117
    const char mask[] = "teksto=";
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   118
    char tmp[sizeof(mask)];
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   119
    fread(tmp, 1, sizeof(mask)-1, stdin);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   120
    tmp[sizeof(mask)-1] = 0;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   121
    if (strcmp(mask, tmp) == 0)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   122
        return 1;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   123
    return 0;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   124
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   125
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   126
int main()
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   127
{
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   128
    char *c;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   129
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   130
    init_dictionary(&dakcentiga, "akcentiga");
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   131
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   132
    if (c = getenv("CONTENT_LENGTH"))
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   133
    {
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   134
        content_length = atoi(c);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   135
        is_http = 1;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   136
    }
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   137
    if (is_http)
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   138
    {
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   139
        print_http_header();
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   140
        if (!eat_form_ok())
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   141
            return -1;
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   142
    }
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   143
    process_text(stdin, 0, -1);
17
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   144
    end_dictionary(&dakcentiga);
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   145
d95d9e7a2b81 General interface to dictionary search.
viric@llimona
parents: 15
diff changeset
   146
    return 0;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
   147
}