viric@11: #include viric@15: #include viric@15: #include viric@15: #include viric@11: #include "dictre.h" viric@11: viric@11: static int closed_accent(const unsigned char *tmp) viric@11: { viric@11: if (tmp[0] == 0xcc && tmp[1] == 0x81) viric@11: return 1; viric@11: return 0; viric@11: } viric@11: viric@11: static int open_accent(const unsigned char *tmp) viric@11: { viric@11: if (tmp[0] == 0x60) viric@11: return 1; viric@11: return 0; viric@11: } viric@11: viric@11: /* Must free what is needed */ viric@11: char * mix_accents(char *a, const char *b) viric@11: { viric@11: int ia,ib,o; viric@11: char *out; viric@11: char tmp[MAXWORD]; viric@11: viric@11: ia = 0; viric@11: ib = 0; viric@11: o = 0; viric@11: while(a[ia] != 0 || b[ib] != 0) viric@11: { viric@11: if (closed_accent(&a[ia])) viric@11: { viric@11: tmp[o] = a[ia]; viric@11: tmp[o+1] = a[ia+1]; viric@11: o+=2; viric@11: ia+=2; viric@11: if(closed_accent(&b[ib])) viric@11: ib+=2; viric@11: continue; viric@11: } else if (closed_accent(&b[ib])) viric@11: { viric@11: tmp[o] = b[ib]; viric@11: tmp[o+1] = b[ib+1]; viric@11: o+=2; viric@11: ib+=2; viric@11: continue; viric@11: } else if (open_accent(&a[ia])) viric@11: { viric@12: tmp[o] = a[ia]; viric@11: o+=1; viric@11: ia+=1; viric@11: if (open_accent(&b[ib])) viric@11: ib+=1; viric@11: continue; viric@11: } else if (open_accent(&b[ib])) viric@11: { viric@11: tmp[o] = b[ib]; viric@11: o+=1; viric@11: ib+=1; viric@11: continue; viric@11: } viric@11: else viric@11: { viric@11: /* Letter */ viric@11: tmp[o] = a[ia]; viric@11: if (a[ia] != 0) viric@11: ++ia; viric@11: if (b[ib] != 0) viric@11: ++ib; viric@11: ++o; viric@11: } viric@11: } viric@11: tmp[o] = 0; viric@11: out = strdup(tmp); viric@11: free(a); viric@11: return out; viric@11: } viric@11: viric@11: void remove_accent(unsigned char *dest, const unsigned char *from) viric@11: { viric@11: int i,o; viric@11: viric@11: i = 0; viric@11: o = 0; viric@11: while (from[i] != 0) viric@11: { viric@11: if (from[i] == 0xcc && from[i+1] == 0x81) viric@11: i+=2; viric@11: else if (from[i] == 0x60) viric@11: ++i; viric@11: else viric@11: { viric@11: dest[o] = from[i]; viric@11: ++o; viric@11: ++i; viric@11: } viric@11: } viric@11: dest[o] = 0; viric@11: } viric@14: viric@14: int skip_newline(const char *str, int *index) viric@14: { viric@14: while(str[*index] != 0 && str[*index] != '\n') viric@14: { viric@14: ++*index; viric@14: } viric@14: viric@14: if (str[*index] == '\n') viric@14: return *index; viric@14: viric@14: return -1; viric@14: } viric@14: viric@14: int until_noword(const char *str, int *index) viric@14: { viric@14: while(str[*index] != 0 && viric@14: str[*index] != ' ' && viric@14: str[*index] != '\n' && viric@14: str[*index] != '\r' && viric@14: str[*index] != ',') viric@14: { viric@14: ++*index; viric@14: } viric@14: viric@14: if (str[*index] != 0) viric@14: return *index; viric@14: viric@14: return -1; viric@14: } viric@14: viric@14: int is_ASCII(unsigned char c) viric@14: { viric@14: if (c < 128) viric@14: return 1; viric@14: return 0; viric@14: } viric@14: viric@14: int until_newword(const unsigned char *str, int *index) viric@14: { viric@14: while(str[*index] != 0 && is_ASCII(str[*index])) viric@14: { viric@14: ++*index; viric@14: } viric@14: viric@14: if (str[*index] != 0); viric@14: return *index; viric@14: viric@14: return -1; viric@14: } viric@15: viric@15: int get_case(enum Case *vcase, const char *str) viric@15: { viric@15: UChar32 c; viric@15: int i; viric@15: int o; viric@15: int len; viric@15: viric@15: len = strlen(str); viric@15: viric@15: i=0; viric@15: o=0; viric@15: do viric@15: { viric@15: U8_NEXT(str, i, len, c); viric@15: /*printf("[%i] ", c);*/ viric@15: if (c == 0) viric@15: break; viric@15: if (u_islower(c)) viric@15: vcase[o] = LCASE; viric@15: else viric@15: vcase[o] = UCASE; viric@15: ++o; viric@15: } while(1); viric@15: viric@15: return o; viric@15: } viric@15: viric@15: void get_lowcase_str(char *out, const char *str) viric@15: { viric@15: UChar32 c; viric@15: int i; viric@15: int o; viric@15: int len; viric@15: char iserror = 0; viric@15: viric@15: len = strlen(str); viric@15: viric@15: i=0; viric@15: o=0; viric@15: do viric@15: { viric@15: U8_NEXT(str, i, len, c); viric@15: /*printf("[%i] ", c);*/ viric@15: c = u_tolower(c); viric@15: U8_APPEND(out, o, MAXWORD, c, iserror); viric@15: if (iserror) viric@15: break; viric@15: if (c == 0) viric@15: break; viric@15: } while(1); viric@15: } viric@15: viric@15: void reapply_case(char *out, const char *in, const enum Case *vcase) viric@15: { viric@15: UChar32 c; viric@15: int i; viric@15: int o; viric@15: int vcasepos; viric@15: int len; viric@15: char iserror = 0; viric@15: const UChar32 inverted = '`'; viric@15: viric@15: len = strlen(in); viric@15: viric@15: i=0; viric@15: o=0; viric@15: vcasepos = 0; viric@15: do viric@15: { viric@15: U8_NEXT(in, i, len, c); viric@15: /*printf("[%i] ", c);*/ viric@15: if (c == inverted || u_hasBinaryProperty(c, UCHAR_DIACRITIC)) viric@15: { viric@15: U8_APPEND(out, o, MAXWORD, c, iserror); viric@15: /* Here we don't increment vcasepos, viric@15: * so the ` or diacritics gets copied without being taken viric@15: * care in the recase process. It will viric@15: * be the only sign that may be _added_ */ viric@15: continue; viric@15: } viric@15: viric@15: if (vcase[vcasepos] == LCASE) viric@15: c = u_tolower(c); viric@15: else viric@15: c = u_toupper(c); viric@15: vcasepos += 1; viric@15: viric@15: U8_APPEND(out, o, MAXWORD, c, iserror); viric@15: viric@15: if (iserror) viric@15: break; viric@15: if (c == 0) viric@15: break; viric@15: } while(1); viric@15: } viric@16: viric@16: void remove_jo(char *str) viric@16: { viric@16: int i, o; viric@16: UChar32 c; viric@16: char iserror = 0; viric@16: viric@16: i=0; viric@16: o=0; viric@16: do viric@16: { viric@16: U8_NEXT(str, i, MAXWORD, c); viric@16: if (c == 0x0451) viric@16: { viric@16: c = 0x0435; viric@16: U8_APPEND(str, o, MAXWORD, c, iserror); viric@16: if (iserror) viric@16: break; viric@16: } viric@16: o = i; viric@16: if (c == 0) viric@16: break; viric@16: } while(1); viric@16: }