parse_text.c
changeset 18 64ed4238657f
parent 17 d95d9e7a2b81
child 24 026a2ba0ce16
equal deleted inserted replaced
17:d95d9e7a2b81 18:64ed4238657f
    53 
    53 
    54 static void process_text(FILE *in, int pos, int length)
    54 static void process_text(FILE *in, int pos, int length)
    55 {
    55 {
    56     unsigned char tmp[MAXWORD];
    56     unsigned char tmp[MAXWORD];
    57     int wordpos = 0;
    57     int wordpos = 0;
       
    58     int skip_non_ascii = 0;
       
    59 
    58     do
    60     do
    59     {
    61     {
    60         int c;
    62         int c;
    61         /* Check pos only if length >= 0 */
    63         /* Check pos only if length >= 0 */
    62         if (length >= 0 && pos >= length)
    64         if (length >= 0 && pos >= length)
    63             break;
    65             break;
    64         c = my_fgetc(in);
    66         c = my_fgetc(in);
    65         if (c == EOF || c == END_OF_URL)
    67         if (c == EOF || c == END_OF_URL)
    66             break;
    68             break;
    67         if (is_ASCII(c))
    69         if (skip_non_ascii || is_ASCII(c))
    68         {
    70         {
    69             if (wordpos != 0)
    71             if (wordpos != 0)
    70             {
    72             {
    71                 tmp[wordpos] = 0;
    73                 tmp[wordpos] = 0;
    72                 give_accent_to_word(tmp);
    74                 give_accent_to_word(tmp);
    73                 wordpos = 0;
    75                 wordpos = 0;
    74             }
    76             }
    75             putchar(c);
    77             putchar(c);
       
    78             /* End of skip_non_ascii when we find an
       
    79              * ascii string */
       
    80             if (skip_non_ascii && is_ASCII(c))
       
    81                 skip_non_ascii = 0;
    76         }
    82         }
    77         else /* non-ASCII - we consider it russian */
    83         else /* non-ASCII - we consider it russian */
    78         {
    84         {
    79             tmp[wordpos++] = c;
    85             tmp[wordpos++] = c;
       
    86             if (wordpos >= MAXWORD)
       
    87             {
       
    88                 /* Dump the word and the rest of non-ASCII, because
       
    89                  * we cannot fit it in 'tmp' */
       
    90                 int i;
       
    91                 for(i=0; i < wordpos; ++i)
       
    92                     putchar(tmp[i]);
       
    93                 wordpos=0;
       
    94                 skip_non_ascii = 1;
       
    95             }
    80         }
    96         }
    81 
    97 
    82         pos += 1;
    98         pos += 1;
    83     } while(1);
    99     } while(1);
    84 
   100