src/ogdl/OgdlParser.java
author viric@llimona
Wed, 17 Jan 2007 19:14:26 +0100
changeset 12 3932322b7d83
parent 0 8bc2bfcd1bd8
permissions -rw-r--r--
Half-commit. Not finished.

/* OGDL, Ordered Graph Data Language 
 * (c) R.Veen, 2002-2006.
 * License: see http://ogdl.org/ (similar to zlib)
 */

package ogdl;


import java.io.IOException;
import java.io.Reader;

import ogdl.parser.IParserHandler;


/** Parser for the OGDL text format

    Non recursive parser. Doesn't need a special Reader.

        <ul>
        <li>All functions return boolean if the item exists, false if not. If false, they don't modify anything.
        <li>Relevant functions send an event thru the ParserEvent object.
        <li>Uses a special ParserReader, not a java.io.PushBackReader.
        <li>Stores locally one leaf/string to process escape sequences. The minimum content element returned
             is one leaf (block or not).
        <li>Leaf type is not preserved: simple, quoted or blocks are all returned as 
        </ul>

    date: Nov 2002.

    $Id: OgdlParser.java,v 1.8 2005/06/06 16:10:53 tbryan Exp $
*/

public final class OgdlParser
{
    public final static int CONTENT = 1;        /* primary events, content related */
    public final static int FORMAT = 2;         /* secondary events, format related */

    protected Reader r;
    int line=1, level=-1, groups[], lineInd[], lineLevel, groupLevel, groupIndex=0;
    
    int savedSpaces = 0;
    boolean savedNewline = false;

    boolean continueParsing = true;

    private IParserHandler event;

    /* leaf buffer, for first level string processing: escape sequences */
    StringBuffer sb;

    public OgdlParser (Reader r, IParserHandler event) throws Exception
    {
        this.r = r;
        this.event = event;
        sb = new StringBuffer();
        groups = new int[64];
        lineInd = new int[64];
    }

    /* XXX: here an efficient string reader ... */

    /** space = char_space+ 
    
        Returns the number of spaces. Mixing of tabs and spaces is not
        allowed as indentation, but not checked yet.
    */

    public int space() throws IOException
    {
        int i;
        
        if ( savedSpaces != 0 ) {
            i = savedSpaces;
            savedSpaces = 0;
            return i;
        }
        
        i = 0;
        while ( Characters.is( read(), Characters.SPACE ) )
            i++;
            
        unread();        
        return i;        
    }

    /** newline ::= ( CR LF) | CR | LF */

    public boolean newline() throws IOException
    {
        line++;
        
        if ( savedNewline ) {
            savedNewline = false;
            return true;
        }
        
        int c = read();
        
        if ( c == '\r' ) {
            c = read();
            if ( c != '\n' )
                unread();
            return true;
        }
        else if ( c == '\n' )
            return true;
            
        line--;
        unread();
        return false;
    }
    
    
    /** eos ::= end of stream (no more chars)  */

    public boolean eos() throws IOException
    {
        int c = read();
        unread();
        
        return c == -1 || c == '\f' ? true: false;
    }    
    
    /** word
     */

    public boolean word() throws IOException
    {
        int i = 0, c;

        sb.setLength(0);

        while ( Characters.is( c=read(), Characters.WORD ) ) {
            sb.append((char) c);
            i++;         
        }

        unread();
        return i>0 ? true:false;
    }

    public boolean comment() throws IOException
    {
        int i = 0, c;

        sb.setLength(0);

        if ( (c=read()) == '#' ) {
            sb.append((char) c);
            i++;         
            while ( ! Characters.is( c=read(), Characters.BREAK ) ) {
                sb.append((char) c);
                i++;         
            }
        }

        unread();
        return i>0 ? true:false;
    }

    /** separator
     */

    public boolean separator() throws IOException
    {
        int i = 0, c;

        sb.setLength(0);

        if ( Characters.isSeparator( (char) (c=read()) ) ) {
            sb.append((char) c);
            i++;         
        }
        else
            unread();

        return i>0 ? true:false;
    }
    
    public boolean quoted() throws IOException
    {
        int q = read();
        int c, cc=0;
        int skip=0;
        
        if ( q != '"' && q != '\'' ) {
            unread();
            return false; 
        }
      
        sb.setLength(0);
        
        while ( true ) {
            c = read();
            if ( c == -1 || ( c == q && cc != '\\' ) )
                break;
            
            if (skip > 0) {
                if ( Characters.isSpace((char) c) ) {
                    skip--;
                    cc = c;
                    continue;
                }
                else 
                    skip = 0;
            }
            
            sb.append((char) c);
            cc = c;
            if ( c == '\n' )
                skip = lineInd[level] + 1;
        }
        return true;
    }

    public boolean block() throws IOException
    {
        int c = read();
        int m,i;
        int ind=-1;
        
        if ( c != '\\' ) {
            unread();
            return false;
        }
        
        if ( !newline() ) 
            return false;           // loosing one char !
        
        sb.setLength(0);
        
        while (true) {
            m = space();
            if (m <= lineInd[lineLevel]) {
                if (newline())
                    sb.append('\n');
                else {
                    savedSpaces = m;
                    break;
                }
            }
            else {
                if (ind<0) ind = m;
                for (i=ind; i<m; i++)
                    sb.append(' ');
                while (true) {
                    c = read();
                    if ( c == -1 || c == '\n' ) {
                        sb.append('\n');
                        break;
                    }
                    sb.append((char)c);
                } 
                if (c == -1) 
                    break;
            }
        }
        return true;
    }
    
    public int node() throws IOException
    { 
        if (block()) {
            continueParsing = event.event(CONTENT,level, sb.toString());
            return -1;
        }
    
        if (!quoted() && !comment() && !word() && !separator())
            return 0;

        int len = sb.length();
        char c = len>0? sb.charAt(0): 0;
    
        if ( c == '(' ) {
            groups[groupIndex++] = level;
            event.event(FORMAT,level, "(");
            return 1;
        }
        else if ( c == ')' ) {
            groups[--groupIndex] = level;
            event.event(FORMAT,level, ")");
            return 1;
        }
        else if (c == ',') {
            // reset level to the first node of this line
            if (groupIndex==0)
                level=lineLevel;
            else
                level = groups[groupIndex-1];
            return 1;
        }
        else if (c == '#')
            return 1;
    
        if (len != 0) 
            continueParsing = event.event(CONTENT, level, sb.toString());

        level++;   
   
        return 1;
    }


    /** line() : space? ( node ( space node )* )? space? newline

       returns:
          0 : EOS
          1 : more
    */

    public boolean line() throws Exception
    {   
        int i = space();
        if ( newline() ) return true;
        if ( eos () )    return false;

        if ( level < 0 ) {
            lineInd[0]=i;
            lineLevel=0;
        }
        else if ( i > lineInd[lineLevel] ) {
            lineInd[++lineLevel] = i;
        }
        else if ( i < lineInd[lineLevel] ) {
            while (lineLevel != 0) {
                if ( i >= lineInd[lineLevel] )
                    break;
                lineLevel--;  
            }
        }

        level = lineLevel;

        while ( (i=node()) > 0) 
            space();
	if (!continueParsing)
		return false;

        if (i > 0) {        /* after a block don't eat spaces */
            space();
            newline();
        }
    
        if (eos()) 
            return false;
        return true;
    }

    /** parse ::= line* */

    public void parse() throws Exception
    {
        try {
            while ( line() ) ;
        }
        catch (SyntaxException e)
        {
            event.error(e,e.line);
        }
    }



    /* ----------------------------------------------------- 
       Small isolation layer.
    */

    boolean unGetFlag = false;
    int unChar;

    public void unread(int c)
    {
    	unGetFlag = true;
    	unChar = c;
    }    
    
    private int read() throws IOException
    {
        if (unGetFlag) {
            unGetFlag = false;
            return unChar;
        }
        return unChar = r.read();
    }
    
    private void unread()
    {
        unGetFlag = true;
    }

}