zrus.c
changeset 15 17a66ceb774a
parent 14 a961bb8806b9
child 16 b4e251400e36
equal deleted inserted replaced
14:a961bb8806b9 15:17a66ceb774a
     1 #include <stdio.h>
     1 #include <stdio.h>
       
     2 #include <unicode/uchar.h>
       
     3 #include <unicode/ustring.h>
       
     4 #include <unicode/utypes.h>
     2 #include "dictre.h"
     5 #include "dictre.h"
     3 
     6 
     4 static int closed_accent(const unsigned char *tmp)
     7 static int closed_accent(const unsigned char *tmp)
     5 {
     8 {
     6     if (tmp[0] == 0xcc && tmp[1] == 0x81)
     9     if (tmp[0] == 0xcc && tmp[1] == 0x81)
   144     if (str[*index] != 0);
   147     if (str[*index] != 0);
   145         return *index;
   148         return *index;
   146 
   149 
   147     return -1;
   150     return -1;
   148 }
   151 }
       
   152 
       
   153 int get_case(enum Case *vcase, const char *str)
       
   154 {
       
   155     UChar32 c;
       
   156     int i;
       
   157     int o;
       
   158     int len;
       
   159 
       
   160     len = strlen(str);
       
   161 
       
   162     i=0;
       
   163     o=0;
       
   164     do
       
   165     {
       
   166         U8_NEXT(str, i, len, c);
       
   167         /*printf("[%i] ", c);*/
       
   168         if (c == 0)
       
   169             break;
       
   170         if (u_islower(c))
       
   171             vcase[o] = LCASE;
       
   172         else
       
   173             vcase[o] = UCASE;
       
   174         ++o;
       
   175     } while(1);
       
   176 
       
   177     return o;
       
   178 }
       
   179 
       
   180 void get_lowcase_str(char *out, const char *str)
       
   181 {
       
   182     UChar32 c;
       
   183     int i;
       
   184     int o;
       
   185     int len;
       
   186     char iserror = 0;
       
   187 
       
   188     len = strlen(str);
       
   189 
       
   190     i=0;
       
   191     o=0;
       
   192     do
       
   193     {
       
   194         U8_NEXT(str, i, len, c);
       
   195         /*printf("[%i] ", c);*/
       
   196         c = u_tolower(c);
       
   197         U8_APPEND(out, o, MAXWORD, c, iserror);
       
   198         if (iserror)
       
   199             break;
       
   200         if (c == 0)
       
   201             break;
       
   202     } while(1);
       
   203 }
       
   204 
       
   205 void reapply_case(char *out, const char *in, const enum Case *vcase)
       
   206 {
       
   207     UChar32 c;
       
   208     int i;
       
   209     int o;
       
   210     int vcasepos;
       
   211     int len;
       
   212     char iserror = 0;
       
   213     const UChar32 inverted = '`';
       
   214 
       
   215     len = strlen(in);
       
   216 
       
   217     i=0;
       
   218     o=0;
       
   219     vcasepos = 0;
       
   220     do
       
   221     {
       
   222         U8_NEXT(in, i, len, c);
       
   223         /*printf("[%i] ", c);*/
       
   224         if (c == inverted || u_hasBinaryProperty(c, UCHAR_DIACRITIC))
       
   225         {
       
   226             U8_APPEND(out, o, MAXWORD, c, iserror);
       
   227             /* Here we don't increment vcasepos,
       
   228              * so the ` or diacritics gets copied without being taken
       
   229              * care in the recase process. It will
       
   230              * be the only sign that may be _added_ */
       
   231             continue;
       
   232         }
       
   233 
       
   234         if (vcase[vcasepos] == LCASE)
       
   235             c = u_tolower(c);
       
   236         else
       
   237             c = u_toupper(c);
       
   238         vcasepos += 1;
       
   239 
       
   240         U8_APPEND(out, o, MAXWORD, c, iserror);
       
   241 
       
   242         if (iserror)
       
   243             break;
       
   244         if (c == 0)
       
   245             break;
       
   246     } while(1);
       
   247 }