zrus.c
author viric@llimona
Wed, 29 Aug 2007 00:19:14 +0200
changeset 14 a961bb8806b9
parent 12 c755c945a96a
child 15 17a66ceb774a
permissions -rw-r--r--
first 'zparsetext'.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     2
#include "dictre.h"
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     3
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     4
static int closed_accent(const unsigned char *tmp)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     5
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     6
    if (tmp[0] == 0xcc && tmp[1] == 0x81)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     7
        return 1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     8
    return 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     9
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    10
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    11
static int open_accent(const unsigned char *tmp)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    12
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    13
    if (tmp[0] == 0x60)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    14
        return 1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    15
    return 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    16
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    17
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    18
/* Must free what is needed */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    19
char * mix_accents(char *a, const char *b)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    20
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    21
    int ia,ib,o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    22
    char *out;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    23
    char tmp[MAXWORD];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    24
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    25
    ia = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    26
    ib = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    27
    o = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    28
    while(a[ia] != 0 || b[ib] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    29
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    30
        if (closed_accent(&a[ia]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    31
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    32
            tmp[o] = a[ia];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    33
            tmp[o+1] = a[ia+1];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    34
            o+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    35
            ia+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    36
            if(closed_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    37
                ib+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    38
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    39
        } else if (closed_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    40
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    41
            tmp[o] = b[ib];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    42
            tmp[o+1] = b[ib+1];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    43
            o+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    44
            ib+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    45
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    46
        } else if (open_accent(&a[ia]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    47
        {
12
c755c945a96a Fixed bug zmixing accents.
viric@llimona
parents: 11
diff changeset
    48
            tmp[o] = a[ia];
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    49
            o+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    50
            ia+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    51
            if (open_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    52
                ib+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    53
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    54
        } else if (open_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    55
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    56
            tmp[o] = b[ib];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    57
            o+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    58
            ib+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    59
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    60
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    61
        else
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    62
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    63
            /* Letter */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    64
            tmp[o] = a[ia];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    65
            if (a[ia] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    66
                ++ia;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    67
            if (b[ib] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    68
                ++ib;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    69
            ++o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    70
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    71
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    72
    tmp[o] = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    73
    out = strdup(tmp);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    74
    free(a);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    75
    return out;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    76
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    77
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    78
void remove_accent(unsigned char *dest, const unsigned char *from)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    79
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    80
    int i,o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    81
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    82
    i = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    83
    o = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    84
    while (from[i] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    85
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    86
        if (from[i] == 0xcc && from[i+1] == 0x81)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    87
            i+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    88
        else if (from[i] == 0x60)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    89
            ++i;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    90
        else
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    91
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    92
            dest[o] = from[i];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    93
            ++o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    94
            ++i;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    95
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    96
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    97
    dest[o] = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    98
}
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
    99
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   100
int skip_newline(const char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   101
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   102
    while(str[*index] != 0 && str[*index] != '\n')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   103
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   104
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   105
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   106
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   107
    if (str[*index] == '\n')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   108
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   109
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   110
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   111
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   112
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   113
int until_noword(const char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   114
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   115
    while(str[*index] != 0 &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   116
            str[*index] != ' ' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   117
            str[*index] != '\n' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   118
            str[*index] != '\r' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   119
            str[*index] != ',')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   120
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   121
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   122
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   123
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   124
    if (str[*index] != 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   125
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   126
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   127
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   128
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   129
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   130
int is_ASCII(unsigned char c)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   131
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   132
    if (c < 128)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   133
        return 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   134
    return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   135
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   136
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   137
int until_newword(const unsigned char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   138
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   139
    while(str[*index] != 0 && is_ASCII(str[*index]))
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   140
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   141
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   142
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   143
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   144
    if (str[*index] != 0);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   145
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   146
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   147
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   148
}