Half-commit. Not finished.
/* OGDL, Ordered Graph Data Language
* (c) R.Veen, 2002-2006.
* License: see http://ogdl.org/ (similar to zlib)
*/
package ogdl;
import java.io.IOException;
import java.io.Reader;
import ogdl.parser.IParserHandler;
/** Parser for the OGDL text format
Non recursive parser. Doesn't need a special Reader.
<ul>
<li>All functions return boolean if the item exists, false if not. If false, they don't modify anything.
<li>Relevant functions send an event thru the ParserEvent object.
<li>Uses a special ParserReader, not a java.io.PushBackReader.
<li>Stores locally one leaf/string to process escape sequences. The minimum content element returned
is one leaf (block or not).
<li>Leaf type is not preserved: simple, quoted or blocks are all returned as
</ul>
date: Nov 2002.
$Id: OgdlParser.java,v 1.8 2005/06/06 16:10:53 tbryan Exp $
*/
public final class OgdlParser
{
public final static int CONTENT = 1; /* primary events, content related */
public final static int FORMAT = 2; /* secondary events, format related */
protected Reader r;
int line=1, level=-1, groups[], lineInd[], lineLevel, groupLevel, groupIndex=0;
int savedSpaces = 0;
boolean savedNewline = false;
boolean continueParsing = true;
private IParserHandler event;
/* leaf buffer, for first level string processing: escape sequences */
StringBuffer sb;
public OgdlParser (Reader r, IParserHandler event) throws Exception
{
this.r = r;
this.event = event;
sb = new StringBuffer();
groups = new int[64];
lineInd = new int[64];
}
/* XXX: here an efficient string reader ... */
/** space = char_space+
Returns the number of spaces. Mixing of tabs and spaces is not
allowed as indentation, but not checked yet.
*/
public int space() throws IOException
{
int i;
if ( savedSpaces != 0 ) {
i = savedSpaces;
savedSpaces = 0;
return i;
}
i = 0;
while ( Characters.is( read(), Characters.SPACE ) )
i++;
unread();
return i;
}
/** newline ::= ( CR LF) | CR | LF */
public boolean newline() throws IOException
{
line++;
if ( savedNewline ) {
savedNewline = false;
return true;
}
int c = read();
if ( c == '\r' ) {
c = read();
if ( c != '\n' )
unread();
return true;
}
else if ( c == '\n' )
return true;
line--;
unread();
return false;
}
/** eos ::= end of stream (no more chars) */
public boolean eos() throws IOException
{
int c = read();
unread();
return c == -1 || c == '\f' ? true: false;
}
/** word
*/
public boolean word() throws IOException
{
int i = 0, c;
sb.setLength(0);
while ( Characters.is( c=read(), Characters.WORD ) ) {
sb.append((char) c);
i++;
}
unread();
return i>0 ? true:false;
}
public boolean comment() throws IOException
{
int i = 0, c;
sb.setLength(0);
if ( (c=read()) == '#' ) {
sb.append((char) c);
i++;
while ( ! Characters.is( c=read(), Characters.BREAK ) ) {
sb.append((char) c);
i++;
}
}
unread();
return i>0 ? true:false;
}
/** separator
*/
public boolean separator() throws IOException
{
int i = 0, c;
sb.setLength(0);
if ( Characters.isSeparator( (char) (c=read()) ) ) {
sb.append((char) c);
i++;
}
else
unread();
return i>0 ? true:false;
}
public boolean quoted() throws IOException
{
int q = read();
int c, cc=0;
int skip=0;
if ( q != '"' && q != '\'' ) {
unread();
return false;
}
sb.setLength(0);
while ( true ) {
c = read();
if ( c == -1 || ( c == q && cc != '\\' ) )
break;
if (skip > 0) {
if ( Characters.isSpace((char) c) ) {
skip--;
cc = c;
continue;
}
else
skip = 0;
}
sb.append((char) c);
cc = c;
if ( c == '\n' )
skip = lineInd[level] + 1;
}
return true;
}
public boolean block() throws IOException
{
int c = read();
int m,i;
int ind=-1;
if ( c != '\\' ) {
unread();
return false;
}
if ( !newline() )
return false; // loosing one char !
sb.setLength(0);
while (true) {
m = space();
if (m <= lineInd[lineLevel]) {
if (newline())
sb.append('\n');
else {
savedSpaces = m;
break;
}
}
else {
if (ind<0) ind = m;
for (i=ind; i<m; i++)
sb.append(' ');
while (true) {
c = read();
if ( c == -1 || c == '\n' ) {
sb.append('\n');
break;
}
sb.append((char)c);
}
if (c == -1)
break;
}
}
return true;
}
public int node() throws IOException
{
if (block()) {
continueParsing = event.event(CONTENT,level, sb.toString());
return -1;
}
if (!quoted() && !comment() && !word() && !separator())
return 0;
int len = sb.length();
char c = len>0? sb.charAt(0): 0;
if ( c == '(' ) {
groups[groupIndex++] = level;
event.event(FORMAT,level, "(");
return 1;
}
else if ( c == ')' ) {
groups[--groupIndex] = level;
event.event(FORMAT,level, ")");
return 1;
}
else if (c == ',') {
// reset level to the first node of this line
if (groupIndex==0)
level=lineLevel;
else
level = groups[groupIndex-1];
return 1;
}
else if (c == '#')
return 1;
if (len != 0)
continueParsing = event.event(CONTENT, level, sb.toString());
level++;
return 1;
}
/** line() : space? ( node ( space node )* )? space? newline
returns:
0 : EOS
1 : more
*/
public boolean line() throws Exception
{
int i = space();
if ( newline() ) return true;
if ( eos () ) return false;
if ( level < 0 ) {
lineInd[0]=i;
lineLevel=0;
}
else if ( i > lineInd[lineLevel] ) {
lineInd[++lineLevel] = i;
}
else if ( i < lineInd[lineLevel] ) {
while (lineLevel != 0) {
if ( i >= lineInd[lineLevel] )
break;
lineLevel--;
}
}
level = lineLevel;
while ( (i=node()) > 0)
space();
if (!continueParsing)
return false;
if (i > 0) { /* after a block don't eat spaces */
space();
newline();
}
if (eos())
return false;
return true;
}
/** parse ::= line* */
public void parse() throws Exception
{
try {
while ( line() ) ;
}
catch (SyntaxException e)
{
event.error(e,e.line);
}
}
/* -----------------------------------------------------
Small isolation layer.
*/
boolean unGetFlag = false;
int unChar;
public void unread(int c)
{
unGetFlag = true;
unChar = c;
}
private int read() throws IOException
{
if (unGetFlag) {
unGetFlag = false;
return unChar;
}
return unChar = r.read();
}
private void unread()
{
unGetFlag = true;
}
}