zrus.c
author viric@llimona
Sat, 01 Sep 2007 12:26:22 +0200
changeset 17 d95d9e7a2b81
parent 16 b4e251400e36
permissions -rw-r--r--
General interface to dictionary search.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     2
#include <unicode/uchar.h>
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     3
#include <unicode/ustring.h>
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
     4
#include <unicode/utypes.h>
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     5
#include "dictre.h"
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     6
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     7
static int closed_accent(const unsigned char *tmp)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     8
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
     9
    if (tmp[0] == 0xcc && tmp[1] == 0x81)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    10
        return 1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    11
    return 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    12
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    13
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    14
static int open_accent(const unsigned char *tmp)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    15
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    16
    if (tmp[0] == 0x60)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    17
        return 1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    18
    return 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    19
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    20
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    21
/* Must free what is needed */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    22
char * mix_accents(char *a, const char *b)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    23
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    24
    int ia,ib,o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    25
    char *out;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    26
    char tmp[MAXWORD];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    27
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    28
    ia = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    29
    ib = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    30
    o = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    31
    while(a[ia] != 0 || b[ib] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    32
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    33
        if (closed_accent(&a[ia]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    34
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    35
            tmp[o] = a[ia];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    36
            tmp[o+1] = a[ia+1];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    37
            o+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    38
            ia+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    39
            if(closed_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    40
                ib+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    41
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    42
        } else if (closed_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    43
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    44
            tmp[o] = b[ib];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    45
            tmp[o+1] = b[ib+1];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    46
            o+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    47
            ib+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    48
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    49
        } else if (open_accent(&a[ia]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    50
        {
12
c755c945a96a Fixed bug zmixing accents.
viric@llimona
parents: 11
diff changeset
    51
            tmp[o] = a[ia];
11
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    52
            o+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    53
            ia+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    54
            if (open_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    55
                ib+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    56
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    57
        } else if (open_accent(&b[ib]))
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    58
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    59
            tmp[o] = b[ib];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    60
            o+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    61
            ib+=1;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    62
            continue;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    63
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    64
        else
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    65
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    66
            /* Letter */
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    67
            tmp[o] = a[ia];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    68
            if (a[ia] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    69
                ++ia;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    70
            if (b[ib] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    71
                ++ib;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    72
            ++o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    73
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    74
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    75
    tmp[o] = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    76
    out = strdup(tmp);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    77
    free(a);
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    78
    return out;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    79
}
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    80
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    81
void remove_accent(unsigned char *dest, const unsigned char *from)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    82
{
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    83
    int i,o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    84
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    85
    i = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    86
    o = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    87
    while (from[i] != 0)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    88
    {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    89
        if (from[i] == 0xcc && from[i+1] == 0x81)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    90
            i+=2;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    91
        else if (from[i] == 0x60)
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    92
            ++i;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    93
        else
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    94
        {
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    95
            dest[o] = from[i];
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    96
            ++o;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    97
            ++i;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    98
        }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
    99
    }
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   100
    dest[o] = 0;
68ea18fe402c Adding code for the zprocess, for processing the Zaliznjak dictionary.
viric@llimona
parents:
diff changeset
   101
}
14
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   102
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   103
int skip_newline(const char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   104
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   105
    while(str[*index] != 0 && str[*index] != '\n')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   106
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   107
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   108
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   109
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   110
    if (str[*index] == '\n')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   111
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   112
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   113
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   114
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   115
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   116
int until_noword(const char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   117
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   118
    while(str[*index] != 0 &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   119
            str[*index] != ' ' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   120
            str[*index] != '\n' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   121
            str[*index] != '\r' &&
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   122
            str[*index] != ',')
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   123
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   124
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   125
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   126
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   127
    if (str[*index] != 0)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   128
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   129
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   130
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   131
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   132
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   133
int is_ASCII(unsigned char c)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   134
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   135
    if (c < 128)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   136
        return 1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   137
    return 0;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   138
}
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   139
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   140
int until_newword(const unsigned char *str, int *index)
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   141
{
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   142
    while(str[*index] != 0 && is_ASCII(str[*index]))
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   143
    {
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   144
        ++*index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   145
    }
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   146
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   147
    if (str[*index] != 0);
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   148
        return *index;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   149
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   150
    return -1;
a961bb8806b9 first 'zparsetext'.
viric@llimona
parents: 12
diff changeset
   151
}
15
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   152
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   153
int get_case(enum Case *vcase, const char *str)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   154
{
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   155
    UChar32 c;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   156
    int i;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   157
    int o;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   158
    int len;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   159
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   160
    len = strlen(str);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   161
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   162
    i=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   163
    o=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   164
    do
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   165
    {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   166
        U8_NEXT(str, i, len, c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   167
        /*printf("[%i] ", c);*/
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   168
        if (c == 0)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   169
            break;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   170
        if (u_islower(c))
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   171
            vcase[o] = LCASE;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   172
        else
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   173
            vcase[o] = UCASE;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   174
        ++o;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   175
    } while(1);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   176
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   177
    return o;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   178
}
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   179
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   180
void get_lowcase_str(char *out, const char *str)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   181
{
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   182
    UChar32 c;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   183
    int i;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   184
    int o;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   185
    int len;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   186
    char iserror = 0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   187
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   188
    len = strlen(str);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   189
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   190
    i=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   191
    o=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   192
    do
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   193
    {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   194
        U8_NEXT(str, i, len, c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   195
        /*printf("[%i] ", c);*/
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   196
        c = u_tolower(c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   197
        U8_APPEND(out, o, MAXWORD, c, iserror);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   198
        if (iserror)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   199
            break;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   200
        if (c == 0)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   201
            break;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   202
    } while(1);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   203
}
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   204
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   205
void reapply_case(char *out, const char *in, const enum Case *vcase)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   206
{
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   207
    UChar32 c;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   208
    int i;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   209
    int o;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   210
    int vcasepos;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   211
    int len;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   212
    char iserror = 0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   213
    const UChar32 inverted = '`';
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   214
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   215
    len = strlen(in);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   216
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   217
    i=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   218
    o=0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   219
    vcasepos = 0;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   220
    do
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   221
    {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   222
        U8_NEXT(in, i, len, c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   223
        /*printf("[%i] ", c);*/
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   224
        if (c == inverted || u_hasBinaryProperty(c, UCHAR_DIACRITIC))
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   225
        {
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   226
            U8_APPEND(out, o, MAXWORD, c, iserror);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   227
            /* Here we don't increment vcasepos,
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   228
             * so the ` or diacritics gets copied without being taken
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   229
             * care in the recase process. It will
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   230
             * be the only sign that may be _added_ */
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   231
            continue;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   232
        }
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   233
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   234
        if (vcase[vcasepos] == LCASE)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   235
            c = u_tolower(c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   236
        else
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   237
            c = u_toupper(c);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   238
        vcasepos += 1;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   239
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   240
        U8_APPEND(out, o, MAXWORD, c, iserror);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   241
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   242
        if (iserror)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   243
            break;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   244
        if (c == 0)
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   245
            break;
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   246
    } while(1);
17a66ceb774a Pritraktado de majuskloj per ICU.
viric@llimona
parents: 14
diff changeset
   247
}
16
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   248
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   249
void remove_jo(char *str)
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   250
{
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   251
    int i, o;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   252
    UChar32 c;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   253
    char iserror = 0;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   254
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   255
    i=0;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   256
    o=0;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   257
    do
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   258
    {
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   259
        U8_NEXT(str, i, MAXWORD, c);
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   260
        if (c == 0x0451)
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   261
        {
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   262
            c = 0x0435;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   263
            U8_APPEND(str, o, MAXWORD, c, iserror);
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   264
            if (iserror)
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   265
                break;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   266
        }
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   267
        o = i;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   268
        if (c == 0)
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   269
            break;
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   270
    } while(1);
b4e251400e36 Improved hash on zprocess, and added parsing for "jo".
viric@llimona
parents: 15
diff changeset
   271
}