http_dec.c
author viric <viriketo@gmail.com>
Fri, 30 Mar 2012 18:55:30 +0200
branchsql
changeset 32 6a1a709330bf
parent 19 4da6dbf01423
permissions -rw-r--r--
Adding code to dump the words as sql.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
19
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     1
#include <stdio.h>
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     2
#include <unicode/utf8.h>
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     3
#include "dictre.h"
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     4
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     5
static int url_get(FILE *f)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     6
{
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     7
    int val;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     8
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
     9
    val = fgetc(f);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    10
    if (val == '%')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    11
    {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    12
        unsigned char num[3];
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    13
        num[2] = '\0';
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    14
        num[0] = fgetc(f);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    15
        num[1] = fgetc(f);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    16
        val = strtol(num, 0, 16);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    17
    } else if (val == '&')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    18
        return END_OF_URL;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    19
    else if (val == '+')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    20
        return ' ';
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    21
    /*printf("[%i]", val);*/
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    22
    return val;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    23
}
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    24
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    25
static int char2num(unsigned char c)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    26
{
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    27
    if (c >= '0' && c <= '9')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    28
        return c - '0';
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    29
    if (c >= 'A' && c <= 'Z')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    30
        return c + 10 - 'A';
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    31
    if (c >= 'a' && c <= 'z')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    32
        return c + 10 - 'a';
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    33
    return 0;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    34
}
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    35
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    36
int http_getc(FILE *f)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    37
{
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    38
    int c;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    39
    static unsigned char tmp[6]; /* for a UTF-8 string */
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    40
    static int itmp = -1;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    41
    static int tmplen;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    42
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    43
    if (itmp == -1)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    44
    {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    45
        c = url_get(f);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    46
        if (c == '&')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    47
        {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    48
            c = url_get(f);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    49
            if (c == '#')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    50
            {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    51
                char iserror = 0;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    52
                int entval;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    53
                /*Get number*/
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    54
                entval = 0;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    55
                while((c = url_get(f)) != ';')
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    56
                {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    57
                    /* Digits in base 10 */
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    58
                    entval = char2num(c) + entval * 10;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    59
                }
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    60
                /*printf("{%i}", entval);*/
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    61
                /*Get utf-8 version*/
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    62
                tmplen = 0;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    63
                U8_APPEND(tmp, tmplen, 6, entval, iserror);
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    64
                if (iserror)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    65
                    return -3;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    66
                /* We need not to program itmp for the next run
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    67
                 * if we have only one character to send */
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    68
                if (tmplen != 1)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    69
                    itmp = 1;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    70
                return tmp[0];
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    71
            }
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    72
            else
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    73
                return HTTP_DECODE_ERROR; /* ERROR! */
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    74
        }
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    75
    } else /* We already have a character to keep on sending */
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    76
    {
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    77
        int tosend;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    78
        tosend = itmp;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    79
        ++itmp;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    80
        if (itmp == tmplen)
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    81
            itmp = -1;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    82
        return tmp[tosend];
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    83
    }
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    84
    return c;
4da6dbf01423 Forgot http decoding routines.
viric@llimona
parents:
diff changeset
    85
}