parse_text.c
author viric@llimona
Sat, 01 Sep 2007 00:50:11 +0200
changeset 15 17a66ceb774a
parent 14 a961bb8806b9
child 17 d95d9e7a2b81
permissions -rw-r--r--
Pritraktado de majuskloj per ICU.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     2
#include "dictre.h"
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     3
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     4
static void give_accent_to_word(const char *word)
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     5
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
     6
    char def[MAXDEF];
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     7
    char low[MAXWORD];
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     8
    char recased[MAXWORD];
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     9
    enum Case vcase[MAXWORD];
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    10
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    11
    /* Get case */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    12
    get_case(vcase, word);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    13
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    14
    /* Get lowercase version */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    15
    get_lowcase_str(low, word);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    16
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    17
    /* Find the lowercase version */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    18
    find_def(low, def);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    19
    if (def[0] != 0) /* found */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    20
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    21
        /* Print the word UNTIL a space.
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    22
         * the definition will have the form:
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    23
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    24
        char *first_space;
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    25
        char spacepos;
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    26
        first_space = strchr(def, ' ');
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    27
        if (first_space != 0) /* Space found */
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    28
        {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    29
            spacepos = first_space - def;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    30
            def[spacepos] = 0; /* Mark an end of string */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    31
            reapply_case(recased, def, vcase);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    32
            printf("%s", recased);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    33
        }
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    34
        return;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    35
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    36
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    37
    /* if first_space == 0 or word not found */
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
    38
    printf("%s", word);
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    39
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    40
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    41
static void process_text(FILE *in, int pos, int length)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    42
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    43
    unsigned char tmp[MAXWORD];
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    44
    int wordpos = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    45
    do
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    46
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    47
        int c;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    48
        /* Check pos only if length >= 0 */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    49
        if (length >= 0 && pos >= length)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    50
            break;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    51
        c = fgetc(in);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    52
        if (c == EOF)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    53
            break;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    54
        if (is_ASCII(c))
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    55
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    56
            if (wordpos != 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    57
            {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    58
                tmp[wordpos] = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    59
                give_accent_to_word(tmp);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    60
                wordpos = 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    61
            }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    62
            putchar(c);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    63
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    64
        else /* non-ASCII - we consider it russian */
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    65
        {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    66
            tmp[wordpos++] = c;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    67
        }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    68
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    69
        pos += 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    70
    } while(1);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    71
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    72
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    73
int main()
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    74
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    75
    init_dictionary();
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    76
    process_text(stdin, 0, -1);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    77
    end_dictionary();
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents:
diff changeset
    78
}