parse_text.c
author viric@llimona
Sun, 02 Sep 2007 14:24:10 +0200
changeset 24 026a2ba0ce16
parent 18 64ed4238657f
child 25 8d524bb8dcea
permissions -rw-r--r--
Now the HTML result shows word meanings.
#include <stdio.h>
#include <stdlib.h>
#include "dictre.h"

static int is_http = 0;
static int give_html = 1;
static int content_length = -1;
static struct Dict dakcentiga;
static struct Dict dsignifoj;

static void print_html_header()
{
    printf(
"<html>\n"
"<head>\n"
"    <meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n"
"    <title>Akcentigita teksto</title>\n"
"</head>\n"
"<style type=\"text/css\" media=\"screen\">\n"
"div.difino { display: none ;\n"
"    position: absolute;\n"
"    left: 0;\n"
"    background: moccasin;\n"
"    border: thin;\n"
"    border-color: black;\n"
"    padding: 10px 10px 10px 10px;\n"
"    text-indent: 0em;\n"
"    text-align: left;\n"
"    }\n"
"div.alineo { text-indent: 2em ; text-align: justify }\n"
"</style>\n"
"<body>\n"
"<script language=\"javascript\">\n"
"var blocked = \"\";\n"
"\n"
"function display(element, ev)\n"
"{\n"
"    var x,y;\n"
"    x = ev.screenX + window.pageXOffset;\n"
"    y = ev.screenY + window.pageYOffset;\n"
"\n"
"    if (blocked != \"\")\n"
"        return;\n"
#if 0
"\n"
"    if (x + 320 > window.innerWidth)\n"
"        x = window.innerWidth - 320;\n"
"\n"
"    /*\n"
"    if (y < window.innerHeight / 2)\n"
"    {\n"
"        y += 5;\n"
"        document.getElementById(element).style.top = y;\n"
"    } else {\n"
"        y -= 5;\n"
"        document.getElementById(element).style.top = 0;\n"
"        document.getElementById(element).style.bottom = y;\n"
"    }\n"
"    */\n"
#endif
"    y += 5;\n"
"    document.getElementById(element).style.top = y + 'px';\n"
"    \n"
/*
"    document.getElementById(element).style.left = 0;\n"
*/
"    document.getElementById(element).style.display = 'block';\n"
"}\n"
"\n"
"function undisplay(element)\n"
"{\n"
"    document.getElementById(element).style.display = 'none';\n"
"}\n"
"\n"
"function click(element, ev)\n"
"{\n"
"    if (blocked == \"\")\n"
"    {\n"
"        display(element, ev);\n"
"        blocked = element;\n"
"    }\n"
"    else\n"
"    {\n"
"        undisplay(blocked);\n"
"        blocked = \"\";\n"
"    }\n"
"}\n"
"\n"
"</script>"
);

}

static void print_html_footer()
{
    printf("</body></html>\n");
}

static void dump_signifo_with_p(const char *word, const char *signifo)
{
    int i;
    char last;

    printf("<p>## Se moto %s:</p>\n<p>", word);

    last = 0;
    i=0;
    do
    {
        char nun;
        nun = *signifo;
        if (nun == '\0')
            break;
        if (nun == '\n' && last == '\n')
            printf("</p><p>");
        putchar(nun);
        last = nun;
        ++signifo;
    } while(1);
    printf("</p>");
}

static void print_start_signifoj(int id)
{
    printf("<div id=\"vorto%i\" onclick=\"click('vorto%i', event);\" "
            "class=\"difino\"><p>\n", id, id);
}

static void print_end_signifoj()
{
    printf("</p></div>");
}

/* This expects the word to start exactly at *wordlist */
static void print_signifoj(char *wordlist, int id)
{
    char * next_space;
    char def[MAXDEF];
    int start_shown = 0;

    do {
        next_space = strchr(wordlist, ' ');
        if (next_space == 0)
            next_space = strchr(wordlist, '\n');
        if (next_space == 0)
            next_space = strchr(wordlist, '\0');
        if (next_space != 0 && next_space != wordlist)
        {
            int spacepos;
            spacepos = next_space - wordlist;
            wordlist[spacepos] = 0;

            /* wordlist has the word to search */
            find_def(&dsignifoj, wordlist, def);
            if (def[0])
            {
                if (!start_shown)
                {
                    print_start_signifoj(id);
                    start_shown = 1;
                }
                dump_signifo_with_p(wordlist, def);
            }
        } else
            break;
        wordlist = next_space + 1;
    } while(1);

    if (start_shown)
        print_end_signifoj();
}

static print_accented(const char *word, int id)
{
    if (give_html)
    {
        printf("<span class=\"vorto\" onclick=\"click('vorto%i', event);\">"
                "%s</span>",id,word);
    } else
        printf("%s", word);
}

static void give_accent_to_word(const char *word)
{
    char def[MAXDEF];
    char low[MAXWORD];
    char recased[MAXWORD];
    enum Case vcase[MAXWORD];
    static int id = 1;

    /* Get case */
    get_case(vcase, word);

    /* Get lowercase version */
    get_lowcase_str(low, word);

    /* Find the lowercase version */
    find_def(&dakcentiga, low, def);
    if (def[0] != 0) /* found */
    {
        /* Print the word UNTIL a space.
         * the definition will have the form:
         *    ACCENTED_WORD NOMINATIVE1 NOMINATIVE2 ... \n */
        char *first_space;
        char spacepos;
        first_space = strchr(def, ' ');
        if (first_space != 0) /* Space found */
        {
            spacepos = first_space - def;
            def[spacepos] = 0; /* Mark an end of string */
            reapply_case(recased, def, vcase);
            print_accented(recased, id);
            if (give_html)
                print_signifoj(first_space + 1 /*' '*/, id);
            ++id;
        }
        return;
    }
    else if (give_html)
    {
        char def[MAXDEF];
        /* OPTIMIZE: This, if find, will react in a second search at
         * print_signifoj() */
        find_def(&dsignifoj, low, def);
        if (def[0])
        {
            print_accented(word, id);
            print_signifoj(low, id);
            ++id;
            return;
        }
    }

    /* else ... */

    /* if first_space == 0 or word not found _AND_ word not found in meanings
     * when give_html...*/
    printf("%s", word);
}

static int my_fgetc(FILE *f)
{
    if (is_http)
        return http_getc(f);
    else
        return fgetc(f);
}

static void process_text(FILE *in, int pos, int length)
{
    unsigned char tmp[MAXWORD];
    int wordpos = 0;
    int skip_non_ascii = 0;
    int last = 0;

    if (give_html)
        printf("<div class=\"alineo\">");
    do
    {
        int c;
        /* Check pos only if length >= 0 */
        if (length >= 0 && pos >= length)
            break;
        c = my_fgetc(in);
        if (c == EOF || c == END_OF_URL)
            break;
        if (c == '\r')
            continue;

        /* Process 'last' for eventual line break */
        if (give_html && c == '\n' && last == '\n')
            printf("</div>\n<div class=\"alineo\">");
        last = c;

        if (skip_non_ascii || is_ASCII(c))
        {
            if (wordpos != 0)
            {
                tmp[wordpos] = 0;
                give_accent_to_word(tmp);
                wordpos = 0;
            }
            putchar(c);
            /* End of skip_non_ascii when we find an
             * ascii string */
            if (skip_non_ascii && is_ASCII(c))
                skip_non_ascii = 0;
        }
        else /* non-ASCII - we consider it russian */
        {
            tmp[wordpos++] = c;
            if (wordpos >= MAXWORD)
            {
                /* Dump the word and the rest of non-ASCII, because
                 * we cannot fit it in 'tmp' */
                int i;
                for(i=0; i < wordpos; ++i)
                    putchar(tmp[i]);
                wordpos=0;
                skip_non_ascii = 1;
            }
        }

        pos += 1;
    } while(1);

    /* End word */
    if (wordpos != 0)
    {
        tmp[wordpos] = 0;
        give_accent_to_word(tmp);
        wordpos = 0;
    }
    if (give_html)
        printf("</div>");
}

static print_http_header()
{
    printf("Content-Type:text/html;charset=utf-8\r\n\r\n");
}

int eat_form_ok()
{
    const char mask[] = "teksto=";
    char tmp[sizeof(mask)];
    fread(tmp, 1, sizeof(mask)-1, stdin);
    tmp[sizeof(mask)-1] = 0;
    if (strcmp(mask, tmp) == 0)
        return 1;
    return 0;
}

int main()
{
    char *c;

    init_dictionary(&dakcentiga, "akcentiga");
    init_dictionary(&dsignifoj, "signifoj");

    if (c = getenv("CONTENT_LENGTH"))
    {
        content_length = atoi(c);
        is_http = 1;
    }
    if (is_http)
    {
        print_http_header();
        if (!eat_form_ok())
            return -1;
    }

    if (give_html)
        print_html_header();
    /* We pass -1 so we don't check content length */
    process_text(stdin, 0, -1);

    if (give_html)
        print_html_footer();
    end_dictionary(&dakcentiga);

    return 0;
}