// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
// The Molly framework is freely distributable under the terms of an
// MIT-style license. For details, see the molly pages web site at:
// http://www.mollypages.org/. Use, modify, have fun !

package fc.web.page;

import java.io.*;
import java.util.*;
import java.util.regex.*;
import fc.io.*;
import fc.util.*;

/*
NOTES

Code blocks of the form 
 [...] 
cause problems with java arrays

String[] or foo[4] etc.., craps out. So we need to use 
 [[...]] 
for the molly code blocks

1. If you are hacking this file, start with parseText()

2. Turn the dbg flag to true to see how the parser works

3. Keep in mind that the order of switch'es in a case statement in various
methods is not always arbitrary (the order matters in this sort
of recursive descent parsing)

4. Read www.mollypages.org/page/grammar/index.mp for a intro to parsing

5. This parser as shipped has a set of regression tests in the fc/web/page/test directory.
These consist of a bunch of *.mp files and corresponding *.java files, each of which is
known to be generated properly. If you change stuff around, run these regression tests
again by invoking "java fc.web.page.PageParserTest" Note, if you change things such that
the .java output of the parser is different, then the tests will fail (since the new .java
files of your parser will be different to the test ones shipped in fc/web/page/test. In
this case, once you know that your parser works as you like it, then you should create a
new baseline for your parser by invoking "java fc.web.page.PageParserTest
-generateExpected" and then you can use *that* as the new baseline for further changes in
your parser (you may have to modify the *.mp files in /fc/web/page/test to use your new
page syntax).

6. 
When including files, previous versions of the parser constructed a new IncludeFile element
which would be invoked when the page was written out. When invoked (via render), that element
would creaet a new PageParser and set includeMode = true on that new parser. This new parser 
would then parse/write out the subtree of the included file in-line.

This inline processing had issues since the included file could not contain import statements,
declarations, etc (since those had already been written out by the parent/top level parser).
Another hack was to pass the child parser the parent/top level object and give access to the 
top level parse root to the child parser (the child parser would have to be invoked immediaately
anyway). Also, since inner classes for parse elements are non-static, the separate parser would
create a parse tree, and although it would add those classes to the top most parse tree, the 
classes themselves (when trying to write) would refer to the separate output stream of the child
class (the output stream would also have to be set to the parent class). It was doaable but gets
un-neccessarily complex.

The only benefit to a seperate parser being able to print start/end sections:
>> start include file
 [..invoke child parser]
>> end include file

In the current/cleaner approach, I simply insert the included file into the character stream.
But there isn't any easy way to track when that stream finishes and the original content starts 
again. So we get:

>> start include file
[..include into original stream and continue parsing]
-- no end include file line --

*/

/**
Parses a page and writes out the corresponding java file to the specified output. The
parser and scanner is combined into one class here for simplicity (a seperate scanner is
overkill for a simple LL(1) grammar such as molly pages).

@author hursh jain
*/
public final class PageParser
{
private static final boolean dbg    = false;
private static final int     EOF    = -1;
private              int     dbgtab = 0;

String          classname;
String          packagename = Page.PACKAGE_NAME;
PageReader      in;
PrintWriter     out;
Log             log;
File            inputFile;
File            outputFile;
File            contextRoot;
boolean         includeMode = false;
String          src_encoding;
int				textNodeCounter = 0;

//Read data
//we use these since stringbuffer/builders do not have a clear/reset function
CharArrayWriter buf = new CharArrayWriter(4096);
CharArrayWriter wsbuf = new CharArrayWriter(32);  // ^(whitespace)* 
int c = EOF;

//PageData
List	decl			 = new ArrayList();     //declarations
List	inc_decl     	 = new ArrayList();     //external included declarations
List	imps         	 = new ArrayList();     //imports
List	tree         	 = new ArrayList();     //code, exp, text etc.
Map		directives   	 = new HashMap();       //page options
Set		circularityTrack = new HashSet();		//track pages already included to stop circular refs

/** 
The  name ("mimetype") of the[@ mimetype=....] directive. The value of <tt>none</tt> or an
empty string will turn off writing any mimetype entirely (the user can then write a
mimetype via the {@link jakarta.servlet.ServletResponse.setContentType} method manually).
<p>
Note, from {@link
jakarta.servlet.ServletResponse.setContentType ServletResponse}
<pre>
Note that the character encoding cannot be communicated via HTTP headers if the servlet
does not specify a content type; however, it is still used to encode text written via the
servlet response's writer.
</pre>
*/
public static String d_mimetype = "mimetype";

/*
this value (or an empty string) for mimetype means no mimetype
will be specified (not even the default mimetype)
*/
public static String mimetype_none = "none";

/** 
The name ("encoding") of the [page encoding=....] directive. 
*/
public static String d_encoding = "encoding";

/** 
The name ("src-encoding") of the [page src-encoding=....] directive. 
*/
public static String d_src_encoding = "src-encoding";

/** The name ("buffersize") of the [page buffersize=....] directive */
public static String d_buffersize = "buffersize";

/** The name ("out") of the [page out=....] directive */
public static String d_out = "out";
/** A value ("outputstream") of the [page out=outputstream] directive */
public static String d_out_stream1 = "outputstream";
/** A value ("outputstream") of the [page out=stream] directive */
public static String d_out_stream2 = "stream";
/** A value ("writer") of the [page out=writer] directive */
public static String d_out_writer = "writer";
/** The name of the  ("remove-initial-whitespace") directive */
public static String d_remove_initial_whitespace = "remove-initial-whitespace";

/* 
Any ".." will be saved as a constant in the constant pool in the class file by javac
Although java strings when stored ont heap and referenced by a variable are 
uptp Integer.MAX_VALUE - 1 (2,147,483,647 bytes, ~ 1 GB unicode) in length,
the constant pool strings are way smaller - 65,535K but not 100% clear and subject to future
change?

We break any long lines beyond 65535 / 2 chars to multiple out("...") statements, where each out statement
contains 32772 chars max. This is used by text nodes for example. Relevant when a massive chunk of
html text exists without any newlines. Each character can double when quoted (" -> \") so to be absolutey
safe, we approx. half the 65535 number.
*/
private static final int MAX_TEXT_LITERAL_LENGTH = 32772;

/* 
This constructor for internal use.

The parser can be invoked recursively to parse included files as
well..that's what the includeMode() does (and this construtor is invoked
when including). When including, we already have a output writer
created, we use that writer (instead of creating a new one based on
src_encoding as we do for in normal page parsing mode).
*/
private PageParser(
 File contextRoot, File input, PrintWriter outputWriter, String classname, Log log) 
throws IOException
	{
	this.contextRoot = contextRoot;
	this.inputFile = input;	
	this.in  = new PageReader(input);
	this.out = outputWriter;
	this.classname = classname;
	this.log = log;

	circularityTrack.add(input.getAbsolutePath());
	}

/**
Creates a new page parser that will use the default log obtained by
{@link Log#getDefault}

@param	contextRoot	absolute path to the webapp context root directory
@param	input		absolute path to the input page file
@param	input		absolute path to the output file (to be written to).
@param	classname	classname to give to the generated java class.
*/
public PageParser(File contextRoot, File input, File output, String classname) 
throws IOException
	{
	this(contextRoot, input, output, classname, Log.getDefault());
	}

/**
Creates a new page parser.

@param	contextRoot	absolute path to the webapp context root directory
@param	input		absolute path to the input page file
@param	output		absolute path to the output file (to be written to).
@param	classname	classname to give to the generated java class.
@log	log			destination for internal logging output.
*/
public PageParser(
	File contextRoot, File input, File output, String classname, Log log) 
throws IOException
	{
	this.contextRoot = contextRoot;
	this.inputFile = input;	
	this.in  = new PageReader(input);
	this.outputFile = output;
	this.classname = classname;
	this.log = log;

	circularityTrack.add(input.getAbsolutePath());
	}

void append(final int c)
	{
	Argcheck.istrue(c >= 0, "Internal error: recieved c=" + c);
	buf.append((char)c);
	}

void append(final char c)
	{
	buf.append(c);
	}

void append(final String str)
	{
	buf.append(str);
	}

/* not used anymore */
PageParser includeMode()
	{
	includeMode = true;
	return this;
	}

/**
Parses the page. If the parse is successful, the java source will be
generated.

@throws	IOException		a parse failure occurred. The java source file
						may or may not be properly generated or written
						in this case.
*/
public void parse() throws IOException
	{
	parseText();	

	if (! includeMode)
		{
		writePage();
		out.close();
		}
	else{
		out.flush();
		}

	in.close();
	}

//util method for use in the case '[' branch of parseText below.
private Text newTextNode()
	{
	Text text = new Text(buf);
	tree.add(text);
	buf.reset();
	return text;
	}

	/* 
	Things always start off with a text node - all tags will be then processed as/when seen.
	
	If there is any white space at the beginning (before any other directives/tags are seen), that
	will become part of the first text node. Any white space *after* a directive/tag will become part 
	of the subsequent text node.
	
	This is relevant when removing initial whitespace (if a directive to do so exists). If such a directive
	exists, the strategy is to keep going down our list of parsed nodes, remove any text nodes that are
	only whitespace, and then ignore any leading whitespace from the first non-empty text node we find.
	
	The textnode itself escapes any whitespace with string escapes (a newline -> "\n") since that is
	fed to the page out(..) as a string. So, to remove whitespaces once they are already escaped into
	strings in the textnode data, we have to remove these escaped versions of those whitespaces. 
	
	To remove a newline, we have to search for "\n". However if the user had typed "\n" to begin with, 
	that would be converted to "\\n", so it gets very tricky to distinguish between what was typed
	in the page and what we escaped.

    In example (1), the page itself has 2 characters typed: [\, n] and so on. Anything not <NL>
    is actually typed as a character in the page.
    
	source            text node            output                    in browser
    \n                 \\n				 out.print("\\n")              \n          (1)
	\<NL>              \\\n              out.print("\\\n")             \           (2)
    \n<NL>             \\n\n             out.print("\\n\n")            \n<NL>      (3)
    
	Carrying on:	
	\\n<NL>             \\\\n\n           out.print("\\\\n\n")          \\n<NL>    (4)
	\\\n<NL>            \\\\\\n\n         out.print("\\\\\\n\n")        \\\n<NL>   (5)

	To eliminate whitespaces, NL in this example, we have to search for: \n *BUT* ignore any \\n

	\n                 \\n                out.print("\\n")              \n        (1)
                       \\n                     "\\n"                    \n        1a: no newline in src!

	\<NL>              \\\n              out.print("\\n\n")             \<NL>     (2)
                       \\ [\n]                 "\\"                      \        2a: NL in src gone!        

	\n<NL>             \\n\n              out.print("\\n\n")            \n<NL>    (3)
                       \\n [\n]                 "\\n"                    \n       3a: NL in src gone!        
	
	\\n<NL>             \\\\n\n           out.print("\\\\n\n")          \\n<NL>   (4)
                        \\ \\n [\n]             "\\\\n"                 \\n       4a: NL in src gone!
                          
	\\\n<NL>            \\\\\\n\n         out.print("\\\\\n\n")         \\\n<NL>  (5)
	\\\n<NL>           \\ \\ \\n [\n]     out.print("\\\\\\n")           \\\n     5a: NL in src gone!
	
	It is quite hokey. If we use \s+ regular expression, that will search for actual NL characters.
	What we want is to search for '\','n' as long as it is not '\','\','n'
	
	We could do this before we escape anything in the Text node itself (as the lexical level) but I've 
	done is post lexical - at writeRenderMethod(). Doing in after the parse step does not touch any 
	existing parsing/lexing code so its safer, a one and done deal right now basically. And when parsing, 
	Text nodes break multiple lines (separated by NL) into separate internal items in a list which makes
	it easier for us - We just have to examine each internal item as a separate line.
	
	Doing it while lexing - using in.skipWhitespace() - is also tricky because parsing is recursive
	and we have to know when to invoke the skip and when not too.
	*/
void parseText() throws IOException
	{
	if (dbg) dbgenter(); 

	while (true)
		{ 
		c = in.read();
		
		if (c == EOF) {
			tree.add(new Text(buf));
			buf.reset();
			break;
			}
		
		switch (c)
			{ 
			//Escape start tags
			case '\\':
				/*	we don't need to do this: previously, expressions
				were [...] but now they are [=...], previously we needed
				to escape \[[ entirely (since if we escaped \[ the second
				[ would start an expression
				*/
				/*				
				if (in.match("[["))  
					append("[[");
				*/
				//escape only \[... otherwise leave \ alone
				if (in.match("["))
					append("[");
				else
					append(c);
				break;

			case '[':
				/* suppose we have
				\[[
				escape handling above will capture \[
				then the second '[' drops down here. Good so far.
				But we must not create a new text object here by
				default...only if we see another [[ or [= or [include or
				whatever. 
				*/
				/*
				But creating a text object at the top is easier
				then repeating this code at every if..else branch below
				but this creates superfluous line breaks.
				
				hello[haha]world
				-->prints as-->
				hello  (text node 1)
				[haha] (text node 2)
				world  (text node 3)
				--> we want
				hello[haha]world (text node 1)
				*/
					
				if (in.match('[')) { 
					newTextNode();
					parseCode(); 
					}
				else if (in.match('=')) {
					Text text = newTextNode();
					parseExpression(text);
					}
				else if (in.match('!')) {
					newTextNode();
					parseDeclaration();
					}
				else if (in.match("/*")) {
					newTextNode();
					parseComment();	
					}
				else if (in.matchIgnoreCase("page")) {
					newTextNode();
					parseDirective();
					}
				//longest match: "include-file" etc., last: "include"
				else if (in.matchIgnoreCase("include-file")) {
					newTextNode();
					parseIncludeFile();
					}
				else if (in.matchIgnoreCase("include-decl")) {
					newTextNode();
					parseIncludeDecl();
					}
				else if (in.matchIgnoreCase("include")) {
					newTextNode();
					parseInclude();
					}
				else if (in.matchIgnoreCase("forward")) {
					newTextNode();
					parseForward();
					}
				else if (in.matchIgnoreCase("import")) {
					newTextNode();
					parseImport();
					}
				else  {
					//System.out.println("c1=" + (char)c);
					append(c);
					}
				break;	
	
			default:
				//System.out.println("c2=" + (char)c);
				append(c);
				
			}	//switch		
		} //while

	if (dbg) dbgexit(); 
	}
	
void parseCode() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();
	
	while (true)
		{
		c = in.read();	
	
		switch (c) /* the order of case tags is important. */
			{
			case EOF:
				unclosed("code", startline, startcol);
				if (dbg) dbgexit(); 
				return;

			case '/':   //Top level:  // and /* comments
				append(c);
				c = in.read();
				append(c);
				if (c == '/') 
					appendCodeSlashComment();
				else if (c == '*') 
					appendCodeStarComment();
  				break;				
		
			case '"': 		//strings outside of any comment
				append(c);
				appendCodeString();  
				break;
				
			case '\'':
				append(c);
				appendCodeCharLiteral();
				break;
				
			case ']':
				if (in.match(']')) {
					tree.add(new Code(buf));
					buf.reset();
					if (dbg) dbgexit(); 
					return;
					}
				else {
					append(c);
					}
				break;
			
			/* 
			a hash by itself on a line starts a hash section.
			whitespace before the # on that line is used as an
			printing 'out' statements for that hash.
			
			for (int n = 0; n < ; n++) {
			....# foo #
			|	}
			|=> 4 spaces 
			so nice if generated code looked like:
			
			for (int n = 0; n < ; n++) {
			    out.print(" foo ");
			    }
			*/
			case '\n':
			case '\r':
				append(c); 			 //the \n or \r just read
				readToFirstNonWS();  //won't read past more newlines 
				//is '#' is first non-ws on this line ?
				c = in.read();
				if (c == '#') {						
					tree.add(new Code(buf));
					buf.reset();
					//whitespace provides indentation offset
					parseHash(wsbuf.toString()); 
					}
				else{
					append(wsbuf.toString());  //wsbuf contains codetext
					//let other cases also handle first non-ws or EOF
					in.unread();    
					}
				break;
			
			/* in this case, hash does not start on a new line, like:
			   for (...) { #
			*/
			case '#':
				tree.add(new Code(buf));
				buf.reset();
				parseHash(null);
 				break;	
			
			default:
				append(c);
			}	//switch		
		}	//while
	}

void parseHash(String offset) throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true)
		{
		c = in.read();	
	
		switch (c)
			{
			case EOF: 
				unclosed("hash", startline, startcol);
				if (dbg) dbgexit(); 
				return;

			//special case: very common and would be a drag to escape
			//this every time:
			//  # <table bgcolor="#ffffff">....   #
			//Now, all of:
			//  bgcolor="#xxx" 	
			//  bgcolor='#xxx'
			//  bgcolor="\#xxx" 
			//will work the same and give: bgcolor="#xxx"
			//1)
			//However to get a:
			//	bgcolor=#xxx	  (no quoted around #xxx)
			//we still have to say:
			//	bgcolor=\#xxx 	
			//2)
			//Of course, since we special case this, then:
			//  #"bar"#
			// that ending # is lost and we end up with
			//  #"bar"  with no closing hash
			//So we need to make sure that we write:
			//  #"bar" #
			// instead

			case '\'':
			case '"':
				append(c);
				if (in.match('#')) 
					append('#');
				break;
				
			case '\\':
				if (in.match('[')) 
					append('[');      
				else if (in.match('#'))
					append('#');
				else
					append(c);
				break;
				
			case '[':
				if (in.match('=')) {
					Hash hash = new Hash(offset, buf);
					tree.add(hash);
					buf.reset();
					parseExpression(hash);
					}
				else{
					append(c);
					}
				break;

			/*
			this case is not needed but is a bit of a optimization
			for (int n = 0; n < 1; n++) {
				#
				foo
			....#...NL
				}
			avoids printing the dots (spaces) and NL in this case
			(the newline after foo is still printed)
			*/
			case '\n':
			case '\r':
				append(c);
				readToFirstNonWS(); 
				c = in.read();
				//'#' is first non-ws on the line
				if (c == '#') {
					tree.add(new Hash(offset, buf));
					buf.reset();
					//skipIfWhitespaceToEnd();
					if (dbg) dbgexit(); 
					return;
					}
				else {
					append(wsbuf.toString());
					in.unread(); //let other cases also handle first non-ws   
					}
				break;

			case '#':
				tree.add(new Hash(offset, buf));  
				//skipIfWhitespaceToEnd();
				buf.reset();
				if (dbg) dbgexit(); 
				return;
				
			default:
				append(c);
			}  //switch 
		} //while
	}

/**
[page <<<FOO]
...as-is..no parse, no interpolation..
FOO
*/
void parseHeredoc(StringBuilder directives_buf) throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();
			
	int i = directives_buf.indexOf("<<<"); /* "<<<".length = 3 */
	CharSequence subseq = directives_buf.substring(
						i+3, 
						/*directives_buf does not have a ending ']' */
						directives_buf.length() 
						);
		
	final String 		  heredoc 	  = subseq.toString().trim();
	final int 	 		  heredoc_len = heredoc.length();
	final CharArrayWriter heredoc_buf = new CharArrayWriter(2048);

	/* 
	the ending heredoc after newline speeds things up a bit
	which is why is traditionally used i guess, otherwise
	we have to try a full match every first match. this 
	implementation doesn't care where the ending heredoc
	appears (can be anywhere)...simplifies the implementation.
	*/
	
	while (true)
		{ 
		c = in.read();
		
		if (c == EOF) {
			unclosed("heredoc: <<<"+heredoc, startline, startcol);
			break;
			}
			
		if (c == heredoc.charAt(0))
			{
			boolean matched = true;
			if (heredoc_len > 1) {
				matched = in.match(heredoc.substring(1));
				}
			if (matched) {	
				tree.add(new Heredoc(heredoc_buf));
				break;
				}
			}
		
		//default action
		heredoc_buf.append((char)c);	
		} //while
		
	if (dbg) dbgexit(); 
	}

/*
Text is the parent node for the expression. A new expression is parsed,
created and added to the text object by this method
*/
void parseExpression(Element parent) throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("expression", startline, startcol);
				if (dbg) dbgexit(); 
				return;

			case '\\':
				if (in.match(']')) 
					append(']');    
				else
					append(c);
				break;

			case ']':
				if (buf.toString().trim().length() == 0)
					error("Empty expression not allowed", startline, startcol);
				parent.addExp(new Exp(buf));
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
				
			default:
				append(c);
			}
		}
	}

void parseComment() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("comment", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case '*':
				if (in.match("/]"))
					{
					tree.add(new Comment(buf));
					buf.reset();	
					if (dbg) dbgexit(); 
					return;
					}
				else
					append(c);	
				break;
			
			default:
				append(c);
			}
		}
	}

void parseDeclaration() throws IOException
	{
	if (dbg) dbgenter(); 
	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("declaration", startline, startcol);
				if (dbg) dbgexit(); 
				return;
			
			case '!':
				if (in.match(']')) {
					decl.add(new Decl(buf));
					buf.reset();	
					if (dbg) dbgexit(); 
					return;
					}
				else{
					append(c);
					}
				break;

			//top level // and /* comments, ']' (close decl tag)
			//is ignored within them
			case '/':   
				append(c);
				c = in.read();
				append(c);
				if (c == '/') 
					appendCodeSlashComment();
				else if (c == '*') 
					appendCodeStarComment();
  				break;				
		
			//close tags are ignored within them
			case '"': 		//strings outside of any comment
				append(c);
				appendCodeString();  
				break;
				
			case '\'':
				append(c);
				appendCodeCharLiteral();
				break;
						
			default:
				append(c);
			}
		}

	}

void parseDirective() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	StringBuilder directives_buf = new StringBuilder(1024);

	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("directive", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case ']':
				if (directives_buf.indexOf("<<<") >= 0)  {
					parseHeredoc(directives_buf); 
					}
				else{/* other directives used at page-generation time */
					addDirectives(directives_buf);
					}
					
				if (dbg) dbgexit(); 
				return;
			
			default:
				directives_buf.append((char)c);
			}
		}

	}

//[a-zA-Z_\-0-9] == ( \w | - )
static final Pattern directive_pat = Pattern.compile(
	//foo = "bar baz" (embd. spaces)
	"\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*\"((?:.|\r|\n)+?)\""  
	+ "|"
	//foo = "bar$@#$" (no spaces) OR foo = bar (quotes optional)
	+ "\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*(\\S+)" 
	);
	
	  
void addDirectives(StringBuilder directives_buf) throws ParseException
	{
	if (dbg) {
		dbgenter(); 
		System.out.println("-------directives section--------");
		System.out.println(directives_buf.toString());
		System.out.println("-------end directives-------");
		}
	
	String name, value;
	try {
		Matcher m = directive_pat.matcher(directives_buf);
		while (m.find()) 
			{
			if (dbg) System.out.println(">>>>[0]->" + m.group() 
				+ "; [1]->" + m.group(1)  
				+ " [2]->" + m.group(2)  
				+ " [3]->" + m.group(3)  
				+ " [4]->" + m.group(4));
				
			name = m.group(1) != null ? m.group(1).toLowerCase() :
										m.group(3).toLowerCase();
			value = m.group(2) != null ? m.group(2).toLowerCase() :
										 m.group(4).toLowerCase();

			if (name.equals(d_buffersize)) 
				{
				//can throw parse exception
				directives.put(name, 
					IOUtil.stringToFileSize(value.replace("\"|'",""))); 
				}
			else if (name.equals(d_encoding)) {
				directives.put(name, value.replace("\"|'",""));				
				}
			else if (name.equals(d_src_encoding)) {
				directives.put(name, value.replace("\"|'",""));				
				}	
			else if (name.equals(d_mimetype)) {
				directives.put(name, value.replace("\"|'",""));				
				}
			else if (name.equals(d_out)) {
				directives.put(name, value.replace("\"|'",""));				
				}	
			else if (name.equals(d_remove_initial_whitespace)) {
				directives.put(name, value.replace("\"|'",""));				
				}	
			//else if .... other directives here as needed....
			else 
				throw new Exception("Do not understand directive: " + m.group());
			}
		if (dbg) System.out.println("Added directives: " + directives);
		}
	catch (Exception e) {
		throw new ParseException("File: " + inputFile.getAbsolutePath() 
		 							+ ";\n" + e.toString());
		}

	if (dbg) dbgexit(); 
	}

void parseIncludeFile() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();
	String option = null;
	
	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("include-file", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case '[':
				if (in.match('=')) {
	//log.warn("Expressions cannot exist in file includes. Ignoring \"[=\"
	//in [include-file... section starting at:", startline, startcol);
	//instead of warn, we will error out. failing early is better.
	//this does preclude having '[=' in the file name, but it's a good
	//tradeoff
					error("Expressions cannot exist in file includes. The offending static-include section starts at:", startline, startcol);
					}
				append(c);
				break;
			
			case ']':
				includeFile(buf, option); /* not added in the tree, just included in the stream */
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
			
			case 'o':
				if (! in.match("ption"))
					append(c);
				else{
					skipWS();
					if (! in.match("=")) {
						error("bad option parameter in file include: ", startline, startcol);
						}
					skipWS();
					
					int c2;
					StringBuilder optionbuf = new StringBuilder();
					while (true) {
						c2 = in.read();
						if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) {		
							in.unread();
							break;
							}
						optionbuf.append((char)c2);
						}
					
					option = optionbuf.toString();
					//System.out.println(option);
					} //else
				break;
	
			default:
				append(c);
			}
		}
	}

void parseIncludeDecl() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();
	String option = null;
	
	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("include-decl", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case '[':
				if (in.match('=')) {
		//log.warn("Expressions cannot exist in file includes. Ignoring \"[=\" in [include-static... section starting at:", startline, startcol);
		//we will throw an exception. failing early is better. this
		//does preclude having '[=' in the file name, but it's a good tradeoff
					error("Expressions cannot exist in include-decl. The offending static-include section starts at:", startline, startcol);
					}
				append(c);
				break;
			
			case ']':
				IncludeDecl i = new IncludeDecl(buf);
				if (option != null)
					i.setOption(option);
				inc_decl.add(i);
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
			
			case 'o':
				if (! in.match("ption"))
					append(c);
				else{
					skipWS();
					if (! in.match("=")) {
						error("bad option parameter in include-code: ", startline, startcol);
						}
					skipWS();
					
					int c2;
					StringBuilder optionbuf = new StringBuilder();
					while (true) {
						c2 = in.read();
						if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) {		
							in.unread();
							break;
							}
						optionbuf.append((char)c2);
						}
					
					option = optionbuf.toString();
					//System.out.println(option);
					} //else
				break;
	
			default:
				append(c);
			}
		}
	}

//the filename/url can be optionally double quoted. leading/trailing
//double quotes (if any) are ignored when an include is rendered...
//this way there isn't any additional parsing needed here...I could
//ignore the optional quote here (and that's the formal proper way) 
//and then not move the ignore quote logic into the render() method but
//this way is good too...and simpler..
//same goes for the other parseIncludeXX/ForwardXX functions.
void parseInclude() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();
	Include include = new Include();
	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("include", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case '[':
				if (in.match('=')) {
					include.add(buf);
					buf.reset();
					parseExpression(include);
					}
				else{
					append(c);
					}
				break;
			
			case ']':
				include.add(buf);
				tree.add(include);
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
			
			default:
				append(c);
			}
		}
	}

void parseForward() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	Forward forward = new Forward();
	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("forward", startline, startcol);
				if (dbg) dbgexit(); 
				return;
				
			case '[':
				if (in.match('=')) {
					forward.add(buf);
					buf.reset();
					parseExpression(forward);
					}
				else{
					append(c);
					}
				break;
			
			case ']':
				forward.add(buf);
				tree.add(forward);
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
			
			default:
				append(c);
			}
		}
	}

//we need to parse imports seperately because they go outside
//a class declaration (and [!...!] goes inside a class)
//import XXX.*;
//class YYY {
//[!....stuff from here ....!]
//...
void parseImport() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true)
		{
		c = in.read();			
	
		switch (c)
			{
			case EOF:
				unclosed("import", startline, startcol);
				if (dbg) dbgexit(); 
				return;
			
			case '\n':
				imps.add(new Import(buf));
				buf.reset();
				break;
				
			case ']':
				imps.add(new Import(buf));
				buf.reset();	
				if (dbg) dbgexit(); 
				return;
			
			default:
				append(c);
			}
		}
	}

/*
Called when // was read at the top level inside a code block. Appends
the contents of a // comment to the buffer (not including the trailing
newline)
*/
void appendCodeSlashComment() throws IOException
	{
	if (dbg) dbgenter();
	
	while (true) 
		{
		c = in.read();
		
		if (c == EOF)
			break;
	
		//do not append \r, \r\n, or \n, that finishes the // comment
		//we need that newline to figure out if the next line is a hash
		//line
		if (c == '\r') {
			in.unread();
			break;
			}
		
		if (c == '\n') {
			in.unread();
			break;	
			}

		append(c);
		}
	
	if (dbg) dbgread("CodeSLASHComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
	if (dbg) dbgexit();
	}

/*
Called when /* was read at the top level inside a code block. Appends
the contents of a /*comment to the buffer. (not including any trailing
newline or spaces)
*/
void appendCodeStarComment() throws IOException
	{
	if (dbg) dbgenter(); 
	
	while (true) 
		{
		c = in.read();	

		if (c == EOF)
			break;
	
		append(c);
		
		if (c == '*') 
			{
			if (in.match('/')) {
				append('/');
				break;
				}
			}
		}

	if (dbg) dbgread("CodeSTARComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
	if (dbg) dbgexit(); 
	}

/*
Called (outside of any comments in the code block) when: 
--> parseCode()
	   ... "
	   	   ^ (we are here)
*/
void appendCodeString() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true) 
		{
		c = in.read();
	
		if (c == EOF || c == '\r' || c == '\n')
			unclosed("string literal", startline, startcol);
	
		append(c);
	
		if (c == '\\') {
			c = in.read();
			if (c == EOF)
				unclosed("string literal", startline, startcol);
			else {
				append(c);
				continue;   //so \" does not hit the if below and break
				}
			}
		
		if (c == '"')
			break;
		}

	if (dbg) dbgread("appendCodeString Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
	if (dbg) dbgexit(); 
	}


/*
Called (outside of any comments in the code block) when: 
--> parseCode()
	   ... '
	   	   ^ (we are here)
*/
void appendCodeCharLiteral() throws IOException
	{
	if (dbg) dbgenter(); 

	int	startline = in.getLine();
	int	startcol = in.getCol();

	while (true) 
		{
		c = in.read();
	
		if (c == EOF || c == '\r' || c == '\n')
			unclosed("char literal", startline, startcol);
	
		append(c);
	
		if (c == '\\') {
			c = in.read();
			if (c == EOF)
				unclosed("char literal", startline, startcol);
			else {
				append(c);
				continue;   //so \' does not hit the if below and break
				}
			}
		
		if (c == '\'')
			break;
		}

	if (dbg) dbgread("appendCodeCharLiteral Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
	if (dbg) dbgexit(); 
	}


/*
Reads from the current position till the first nonwhitespace char, EOF or
newline is encountered. Reads are into the whitespace buffer. does not
consume the character past the non-whitespace character and does
NOT read multiple lines of whitespace.
*/
void readToFirstNonWS() throws IOException 
	{
	wsbuf.reset();

	while (true)
		{
		c = in.read();
	
		if (c == '\r' || c == '\n')
			break;
			
		if (c == EOF || ! Character.isWhitespace(c))
			break;
	
		wsbuf.append((char)c);
		}
		
	in.unread();
	}

//skip till end of whitespace or EOF. does not consume any chars past 
//the whitespace.
void skipWS() throws IOException
	{
	int c2 = EOF;
	while (true) {
		c2 = in.read();
		if (c2 == EOF || ! Character.isWhitespace(c2)) {
			in.unread();
			break;
			}
		}	
	}
	
//skips to the end of line if the rest of the line is (from the current
//position), all whitespace till the end. otherwise, does not change 
//current position. consumes trailing newlines (if present) when reading 
//whitespace.
void skipIfWhitespaceToEnd() throws IOException
	{
	int count = 0;
	
	while (true) 
		{
		c = in.read();
    	count++;

		if (c == '\r') {
			in.match('\n');
			return;
			}
			
		if (c == '\n' || c == EOF)
			return;
			
		if (! Character.isWhitespace(c))
			break;
    	}

	in.unread(count);
	}

//not used anymore but left here for potential future use. does not
//consume the newline (if present)
void skipToLineEnd() throws IOException 
	{
    while (true) 
    	{
    	int c = in.read();
    	if (c == EOF) {
    		in.unread();
			break;
    		}
    	if (c == '\n' || c == '\r') { 
    		in.unread();
    		break;
    		}
    	}
    }

String quote(final char c) 
	{
    switch (c)
    	{
    	case '\r':
            return "\\r";
     	      
    	case '\n':
            return "\\n";
 
 		case '\"':   /* this is a quirk, '\"' is same as '"' for char literals, keeping as-is for legacy*/
 			//can also say: new String(new char[] {'\', '"'})
            return "\\\"";    //--> \"
 
 		case '\\':
            return "\\\\";
    
    	default:
    		return String.valueOf(c);
    	}
    }

//======= util and debug methods ==========================
String methodName(int framenum)
	{
	StackTraceElement ste[] = new Exception().getStackTrace();
	//get method that called us, we are ste[0]
	StackTraceElement st = ste[framenum];
	String file = st.getFileName();
	int line = st.getLineNumber();
	String method = st.getMethodName();
	String threadname = Thread.currentThread().getName();
	return method + "()";   
	}

void dbgenter() {
	System.out.format("%s-->%s\n", StringUtil.repeat('\t', dbgtab++), methodName(2));
	}
	
void dbgexit() {
	System.out.format("%s<--%s\n", StringUtil.repeat('\t', --dbgtab), methodName(2));
	}

void dbgread(String str) {
	System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str));
	}

void dbgread(String str, List list) {
	System.out.format("%s %s: ", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str));
	for (int n = 0; n < list.size(); n++) {
		System.out.print( StringUtil.viewableAscii( (String)list.get(n) ) );
		}
	System.out.println("");
	}

void dbgread(char c) {
	System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(c));
	}

void dbgread(CharArrayWriter buf) {
	System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(buf.toString()));
	}

void unclosed(String blockname, int startline, int startcol) throws IOException
	{
	throw new IOException(blockname + " tag not closed.\nThis tag was possibly opened in: \nFile:"
		+ inputFile + ", line:" 
		+ startline + " column:" + startcol +
		".\nCurrent line:" + in.getLine() + " column:" + in.getCol());	
	}

void error(String msg, int line, int col) throws IOException
	{
	throw new IOException("Error in File:" + inputFile + " Line:" + line + " Col:" + col + " " + msg);	
	}

void error(String msg) throws IOException
	{
	throw new IOException("Error in File:" + inputFile + " " + msg);	
	}

//============== Non Parsing methods ================================
void o(Object str) {
	out.print(str);
	}

void ol(Object str) {
	out.println(str);	
	}

void ol() {
	out.println();
	}
	
/**
Returns the src_encoding directive (if any) defined in this page or <tt>null</tt> if no source encoding
has been specified.
*/
String getSourceEncoding() {
	return src_encoding;
	}

/* 
include an external file whose contents will be rendered as part of the page.
*/ 
void includeFile(CharArrayWriter buf, String option) throws IOException
	{
	String str;
	
	if (dbg) dbgread("<new INCLUDE-FILE> "); 
	str = removeLeadingTrailingQuote(buf.toString().trim());
	
	File includeFile = null;
	File parentDir = inputFile.getParentFile();
	if (parentDir == null) {
		parentDir = new File(".");
		}

	if (str.startsWith("/"))
		includeFile = new File(contextRoot, str);
	else
		includeFile = new File(parentDir, str);
				
	//System.out.println(">>>>>>>>>> f="+f +";root="+contextRoot);
			
	if (! includeFile.exists()) {
		throw new IOException("Include file does not exist: " + includeFile.getCanonicalPath());
		}

	if (circularityTrack.contains(includeFile.getAbsolutePath())) {
		 throw new IOException("Circularity detected when including: " + includeFile.getCanonicalPath() + "\nAlready included the following files: " + circularityTrack);
		}

	tree.add(new MollyComment(
		"//>>>START INCLUDE from: " + includeFile.getAbsolutePath()));
		
	/*
		PageParser pp = new PageParser(contextRoot, includeFile, out, classname, log);
		pp.includeMode().parse();  //writes to out
	*/
	
	in.insertIntoStream(includeFile);

	/* this is printed immediately before the inserted contents can be processed, so don't add this */
	/*
	tree.add(new MollyComment(
		"//>>>END INCLUDE from: " + includeFile.getAbsolutePath()));
	*/
	
	circularityTrack.add(includeFile.getAbsolutePath());
	}

	
void writePage() throws IOException
	{	
	if (! includeMode)
		{
		if (directives.containsKey(d_src_encoding)) {
			this.src_encoding = (String) directives.get(d_src_encoding);
			this.src_encoding = removeLeadingTrailingQuote(this.src_encoding);
			}
	
		//create a appropriate PrintWriter based on either the default
		//java encoding or the page specified java encoding
		//the java source file will be written out in this encoding
	
		FileOutputStream	fout = new FileOutputStream(outputFile);
		OutputStreamWriter  fw   = (src_encoding != null) ?
				new OutputStreamWriter(fout, src_encoding) :
				new OutputStreamWriter(fout, Page.DEFAULT_SRC_ENCODING);
				
		out	= new PrintWriter(new BufferedWriter(fw));
		}
		
	if (! includeMode) 
		{
		writePackage();
		writeImports();
		
		o ("public class ");
		o (classname);
		ol(" extends fc.web.page.PageImpl");
		ol("{");
		}

	writeFields();

	if (! includeMode) {
		writeConstructor();
		}
		
	writeMethods();
	
	if (! includeMode) {
		ol("}");
		}
	}

void writePackage()
	{
	o ("package ");
	o (packagename);
	ol(";");
	ol();
	}
	
void writeImports() throws IOException
	{
	ol("import jakarta.servlet.*;");
	ol("import jakarta.servlet.http.*;");
	ol("import java.io.*;");
	ol("import java.util.*;");
	//write this in case (very rare) that a page overrides the 
	//Page.init()/destory methods [we need pageservlet for init(..)]
	ol("import fc.web.page.PageServlet;");
	for (int n = 0; n < imps.size(); n++) {
		((Element)imps.get(n)).render();
		ol();
		}
	ol();
	}

void writeFields()
	{
	}

void writeConstructor()
	{
	}

void writeMethods() throws IOException
	{
	writeDeclaredMethods();
	writeIncludedMethods();
	writeRenderMethod();
	}
	
void writeDeclaredMethods() throws IOException
	{
	for (int n = 0; n < decl.size(); n++) {
		((Element)decl.get(n)).render();
		}
	
	if (decl.size() > 0)
		ol();
	}

void writeIncludedMethods() throws IOException
	{
	for (int n = 0; n < inc_decl.size(); n++) {
		((Element)inc_decl.get(n)).render();
		}
		
	if (inc_decl.size() > 0)
		ol();
	}

void writeRenderMethod() throws IOException
	{
	if 	(! includeMode) {
		writeRenderTop();
		}

	//if (! (e instanceof Comment || e instanceof Decl || e instanceof MollyComment)) {

	boolean removeInitialEmpty = directives.containsKey(d_remove_initial_whitespace);

	boolean firstNonWhiteSpaceNodeSeen = false;
	
	//render entire tree
	for (int n = 0; n < tree.size(); n++) 
		{
		Element elem = (Element)tree.get(n);
		
		if (elem instanceof Text) {
			Text t = (Text) elem;
			//System.out.println("before whitespace removal: " + elem);
			if (removeInitialEmpty && ! firstNonWhiteSpaceNodeSeen) {
				if (t.isOnlyWhiteSpaceNode()) {
					t.clear();
					}
				else{
					t.removeInitialEmptyLines();
					firstNonWhiteSpaceNodeSeen = true;
					}
				}
			//System.out.println("after whitespace removal: " + elem);
			//System.out.println("---------------------------");
			}
	
		elem.render();
		}
		
	if (! includeMode) {
		writeRenderBottom();
		}
			
	}
	
void writeRenderTop() throws IOException
	{
	ol("public void render(HttpServletRequest req, HttpServletResponse res) throws Exception");
	ol("\t{");
    ol("	/* for people used to typing 'request/response' */");
	ol("	final HttpServletRequest  request = req;");
	ol("	final HttpServletResponse response = res;");
	ol();
	//mime+charset
	String content_type = "";
	if (directives.containsKey(d_mimetype)) 
		{
		String mtype = (String) directives.get(d_mimetype);
		if (!  (mtype.equals("") || mtype.equals(mimetype_none)) ) 
			{
			mtype = removeLeadingTrailingQuote(mtype);
		 	content_type += mtype;
		 	}
		} 
	else{
	 	content_type += Page.DEFAULT_MIME_TYPE;
	 	}

		
	if (directives.containsKey(d_encoding)) {
		String encoding = (String) directives.get(d_encoding);
		encoding = removeLeadingTrailingQuote(encoding);
		/*an empty encoding means that the encoding is specified in the
		html header*/
		if (! encoding.trim().equals("")) { 
			content_type += "; charset=";
			content_type += encoding; 
			}
		}
	else{
		content_type += "; charset=";
	 	content_type += Page.DEFAULT_ENCODING;
	 	}

	o ("	res.setContentType(\""); o (content_type); ol("\");");

	//buffer
	if (directives.containsKey(d_buffersize)) {
		o ("	res.setBufferSize(");
		o (directives.get(d_buffersize));
		ol(");");
		}
		
	//stream or writer
	boolean stream = false;
	if (directives.containsKey(d_out)) 
		{
		String stream_type = ((String) directives.get(d_out)).toLowerCase().intern();

		if (stream_type == d_out_stream1 || stream_type == d_out_stream2) {
			stream = true;
			}
		else if (stream_type == d_out_writer) {
			stream = false;
			}
		else{
			error("Did not understand directive [directive name=out, value=" + stream_type + "]. Choose between (" +  d_out_stream1 + ") and (" + d_out_writer + ")");
			}
		}
		
	if (stream)
		ol("	ServletOutputStream out = res.getOutputStream();");
	else
		ol("	PrintWriter out = res.getWriter();");

	}

void writeRenderBottom() throws IOException
	{
	ol();
	ol("\t} //~render end");
	}


/*
int tabcount = 1;
String tab = "\t";
void tabInc() {
	tab = StringUtil.repeat('\t', ++tabcount);
	}
void tabDec() {
	tab = StringUtil.repeat('\t', --tabcount);
	}
*/

abstract class Element {
	abstract void render() throws IOException;
	//text, include etc., implement this as needed. 
	void addExp(Exp e) {  
		throw new RuntimeException("Internal error: not implemented by this object"); 
		}
	}
		
//this should NOT be added to the tree directly but added to Text or Hash
//via the addExp() method. This is because exps must be printed inline
class Exp extends Element
	{
	String str;
	
	Exp(CharArrayWriter buf) {
		this.str = buf.toString();
		if (dbg) dbgread("<new EXP> "+ str); 
		}

	void render() {
		o("out.print  (");
		o(str);
		ol(");");
		}
		
	public String toString() {
		return "Exp: [" + str + "]";
		}
	}

//this is of course mofo crazy but that's java regex embedded into java strings for you
final String whiteSpaceOnlyPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*$";
final String whiteSpaceBeginPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*";

class Text extends Element
	{
	String  offset_space;
	final 	List list = new ArrayList();
	int 	nodeNumber = textNodeCounter++;
	
	//each text section is parsed by a text node. Within EACH text
	//node, we split it's contained text into separate lines and
	//generate code to print each line with a "out.println(...)"
	//statement. This maintains the same source order as the molly
	//page. If we munge together everything and print all of it's
	//contents with just one out.println(...)" statement, we would
	//get one large line with embedded \n and that would make
	//things more difficult to co-relate with the source file.

	Text(final String offset, final CharArrayWriter b) 
		{
		if (offset == null) {
			offset_space = "\t";
			}
		else{
			offset_space = "\t" + offset;
			}
	
		final char[] buf = b.toCharArray();

		boolean prevWasCR = false;
		//jdk default is 32. we say 256. not too large, maybe
		//less cache pressure. not too important, gets resized
		//as needed anyway.
		final CharArrayWriter tmp = new CharArrayWriter(256);
		
		//intead of tmp.size() > MAX_TEXT_LITERAL_LENGTH  in the first if statement below, make
		//things faster by avoiding a method call in a loop and using a local variable tcount
		int tcount = 0;
	
		for (int i=0, j=1; i < buf.length; i++, j++) 
			{
			char c = buf[i];
			tcount++;
			if (tcount > MAX_TEXT_LITERAL_LENGTH || j == buf.length) {
				tmp.append(quote(c));
				list.add(tmp.toString());
				tmp.reset();
				tcount = 0;
				}
			else if (c == '\n') {
				tmp.append(quote(c));
				if (! prevWasCR) {
					list.add(tmp.toString());
					tmp.reset();
					tcount = 0;
					}
				}
			else if (c == '\r') {
				tmp.append(quote(c));
				list.add(tmp.toString());
				tmp.reset();
				tcount = 0;
				prevWasCR = true;
				}
			else{
				tmp.append(quote(c));
				prevWasCR = false;
				}
			}

		if (dbg) {
			String classname = getClass().getName();
			dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 
			}
		}

	Text(CharArrayWriter b) 
		{
		this(null, b);
		}
		
	void addExp(Exp e)
		{
		list.add(e);
		}

	void render() 
		{
		for (int i=0; i<list.size(); i++) 
			{
			Object obj = list.get(i); //can be String or Exp
			if (obj instanceof Exp) {
				o(offset_space);
				((Exp)obj).render();
				}
			else{
				o(offset_space);
				o("out.print  (\"");
				o(obj);
				ol("\");");	
				}
			}
		} //render

	boolean isOnlyWhiteSpaceLine(String s)
		{
		return s.matches(whiteSpaceOnlyPat);
		}

	// one or all lines (each line being a separate list item) in this text node are white space
	// example 3 lines: ['\n', '\n', ' \t\n'] -> true
	// example 3 lines: ['\n', '\n', Exp Node, ' \t\n'] -> false
	boolean isOnlyWhiteSpaceNode()
		{	
		for (int n = 0; n < list.size(); n++) {
			Object obj = list.get(n);
			if (! (obj instanceof String)) {
				return false;
				}
			if (! isOnlyWhiteSpaceLine((String)obj)) {
				return false;
				}
			}	
		return true;
		}

	// some lines (each line being a separate list item) in this text node are white space
	// before: ['\n', '\n', ' \n\thello\n', Exp Node, '\n']
	// after:  ['hello\n', Exp Node, '\n']
	void removeInitialEmptyLines()
		{
		Iterator it = list.iterator(); 	//have to use iterator when removing while transversing
		while (it.hasNext()) 
			{
			Object obj = it.next();
			if (! (obj instanceof String)) {
				break;
				}
			String s = (String)obj;
			if (isOnlyWhiteSpaceLine(s)) {
				it.remove();
				}
			else{
				s.replaceFirst(whiteSpaceBeginPat, "");
				break;
				}
			}
		}

	//clear all contents of this node - used only for white space removal		
	void clear()
		{
		list.clear();
		}
	
	public String toString() {
		StringBuilder buf = new StringBuilder();
		buf.append("Text (#" + nodeNumber + "):");
		if (list.size() == 0) {
			append("<EMPTY>");
			}
		else{
			for (int n = 0; n < list.size(); n++) {
				buf.append("[");
				buf.append(StringUtil.viewableAscii(String.valueOf(list.get(n))));
				buf.append("]");
				if (n + 1 < list.size()) {
					buf.append(",");
					}
				}
			}
		return buf.toString();
		}
	
	}

class Hash extends Text
	{
	Hash(final String offset, final CharArrayWriter b) 
		{
		super(offset, b);
		}

	//same as super.render() except for j == list.size() o/ol() below
	void render() 
		{
		for (int i=0, j=1; i<list.size(); i++, j++) 
			{
			Object obj = list.get(i); //can be String or Exp
			if (obj instanceof Exp) {
				o(offset_space);
				((Exp)obj).render();
				}
			else{
				o(offset_space);
				o("out.print  (\"");
				o(obj);
				
				if (j == list.size()) 
					o ("\");");
				else
					ol("\");");	
				}
			}
		} //render

	public String toString() {
		return "Hash: " + list;
		}
	}

class Heredoc extends Text
	{
	Heredoc(final CharArrayWriter buf) 
		{
		super(null, buf);
		}

	//override, exp cannot be added to heredoc sections
	void addExp(Exp e)
		{
		throw new IllegalStateException("Internal implementation error: this method should not be called for a Heredoc object");
		}
		
	void render() 
		{
		for (int i=0, j=1; i<list.size(); i++, j++) 
			{
			Object obj = list.get(i); 
			o(offset_space);
			o("out.print  (\"");
			o(obj);
			ol("\");");	
			}
		} //render

	public String toString() {
		return "Heredoc: " + list;
		}

	}

class Code extends Element
	{
	List list = new ArrayList();
	
	Code(CharArrayWriter b) 
		{
		//we split the code section into separate lines and 
		//print each line with a out.print(...). This maintains
		//the same source order as the molly page. If we munge together
		//everything, we would get one large line with embedded \n
		//and that would make things more difficult to co-relate.
		final char[] buf = b.toCharArray();
		CharArrayWriter tmp = new CharArrayWriter();
		for (int i=0, j=1; i < buf.length; i++, j++) {
			char c = buf[i];   
			if (j == buf.length) { //end of buffer
				tmp.append(c);
				list.add(tmp.toString());
				tmp.reset();
				}
			else if (c == '\n') {
				tmp.append(c);
				list.add(tmp.toString());
				tmp.reset();
				}
			else
				tmp.append(c);
			}
		if (dbg) {
			String classname = getClass().getName();
			dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 
			}
		}

	void render() {
		for (int i = 0; i < list.size(); i++) {
			o('\t');
			o(list.get(i));
			}
		}
		
	public String toString() {
		return "Code: " + list;
		}
	}

class Comment extends Element
	{
	String str;
	
	Comment(CharArrayWriter buf) {
		this.str = buf.toString();
		if (dbg) dbgread("<new COMMENT> "+ str); 
		}

	void render() {
		//we don't print commented sections
		}

	public String toString() {
		return "Comment: [" + str + "]";
		}
	}

class Decl extends Code
	{
	Decl(CharArrayWriter buf) {
		super(buf);
		}

	void render() {
		for (int i = 0; i < list.size(); i++) {
			o (list.get(i));
			}
		}
	}

/* base class for Forward and Include */
class ForwardIncludeElement extends Element
	{
	List   	parts = new ArrayList();
	boolean useBuf = false;
	
	// the following is for includes with expressions 
	// [include foo[=i].html]  
	// i could be 1,2,3.. the parser adds the xpression [=i] to this
	// object if it's present via the addExp method below
	void add(CharArrayWriter buf)
		{
		parts.add(buf.toString().trim());
		if (parts.size() > 1) {
			useBuf = true;
			}
		}

	void addExp(Exp e)
		{
		parts.add(e);
		useBuf = true;
		}

	void render() throws IOException
		{
		if (parts.size() == 0) {
			//log.warn("possible internal error, parts.size()==0 in Forward");
			return;
			}

		ol("\t{ //this code block gives 'rd' its own namespace");
	
		if (! useBuf) {
			o ("\tfinal RequestDispatcher rd = req.getRequestDispatcher(\"");
			//only 1 string
			o (removeLeadingTrailingQuote(parts.get(0).toString())); 
			ol("\");");
			}
		else{
			ol("\tfinal StringBuilder buf = new StringBuilder();");
			for (int n = 0; n < parts.size(); n++) {
				Object obj = parts.get(n);
				if ( n == 0 || (n + 1) == parts.size() ) {
					obj = removeLeadingTrailingQuote(obj.toString());
					}
				if (obj instanceof String) {
					o ("\tbuf.append(\"");
					o (obj);
					ol("\");");
					}
				else{
					o ("\tbuf.append(");
					o ( ((Exp)obj).str );
					ol(");");
					}
				} //for
			ol("\tfinal RequestDispatcher rd = req.getRequestDispatcher(buf.toString());");
			} //else
		}


	public String toString() {
		return "Forward: " + parts;
		}
	}

/* a request dispatcher based include. */
class Include extends ForwardIncludeElement
	{
	Include() {
		if (dbg) dbgread("<new INCLUDE> "); 
		}
		
	void render() throws IOException
		{
		super.render();
		ol("\trd.include(req, res);");
		ol("\t}		//end rd block");
		}

	/* uses parent toString */
	}

/* a request dispatcher based forward */
class Forward extends ForwardIncludeElement
	{
	Forward() {
		if (dbg) dbgread("<new FORWARD>"); 
		}

	void render() throws IOException
		{
		super.render();
		ol("\t//WARNING: any uncommitted page content before this forward will be discarded.");
		ol("\t//If the response has already been committed an exception will be thrown. ");

		ol("\trd.forward(req, res);");

		ol("\t//NOTE: You should 'return' right after this line. There should be no content in your ");
		ol("\t//page after the forward statement");
		ol("\t}		//end rd block");
		}

	/* uses parent toString */
	}


/* a molly mechanism to include an external file containing code and method
   declarations. These are typically commom utility methods and global
   vars. The included file is not parsed by the molly parser... the contents
   are treated as if they were written directly inside a [!....!] block.
*/ 
class IncludeDecl extends Element
	{
	String str;
	String opt;
	
	IncludeDecl(CharArrayWriter buf) {
		if (dbg) dbgread("<new INCLUDE-DECL> "); 
		str = removeLeadingTrailingQuote(buf.toString().trim());
		}
	
	void setOption(String opt) {
		this.opt = opt;
		}
	
	void render() throws IOException
		{
		File f = null;
		File parentDir = inputFile.getParentFile();
		if (parentDir == null) {
			parentDir = new File(".");
			}

		final int strlen = str.length();
		
		if (str.startsWith("\"") || str.startsWith("'")) 
			{
			if (strlen == 1) //just " or ' 
				throw new IOException("Bad include file name: " + str);
				
			str = str.substring(1, strlen);
			}

		if (str.endsWith("\"") || str.endsWith("'")) 
			{
			if (strlen == 1) //just " or ' 
				throw new IOException("Bad include file name: " + str);
				
			str = str.substring(0, strlen-1);
			}

		if (str.startsWith("/"))
			f = new File(contextRoot, str);
		else
			f = new File(parentDir, str);
		
		/* f = new File(parentDir, str); */
		
		if (! f.exists()) {
			throw new IOException("Include file does not exist: " + f.getCanonicalPath());
			}

		o("//>>>START INCLUDE DECLARTIONS from: ");
		o(f.getAbsolutePath());
		ol();
				
		o(IOUtil.inputStreamToString(new FileInputStream(f)));
	
		o("//>>>END INCLUDE DECLARATIONS from: ");
		o(f.getAbsolutePath());
		ol();
		
		//circularities are tricky, later
		//includeMap.put(pageloc, f.getCanonicalPath());
		}

	public String toString() {
		return "IncludeDecl: [" + str + "; options: " + opt + "]";
		}
	}

class Import extends Code
	{
	Import(CharArrayWriter buf) {
		super(buf);
		}

	void render() {
		for (int i = 0; i < list.size(); i++) {
			o (list.get(i));
			}
		}
	}

class MollyComment extends Element
	{
	String str;
	
	MollyComment(String str) {
		this.str = str;
		if (dbg) dbgread("<new MollyComment> "+ str); 
		}

	void render() {
		ol(str);
		}
		
	public String toString() {
		return "MollyComment: [" + str + "]";
		}
	}
	
/**
removes starting and trailing single/double quotes. used by the
include/forward render methods only, NOT used while parsing.
*/
private static String removeLeadingTrailingQuote(String str)
	{
	if (str == null)
		return str;

	if ( str.startsWith("\"") || str.startsWith("'") )	{
		str = str.substring(1, str.length());
		}

	if ( str.endsWith("\"") || str.endsWith("'") ) {
		str = str.substring(0, str.length()-1);	
		}

	return str;
	}

//===============================================

public static void main (String args[]) throws IOException
	{
	Args myargs = new Args(args);
	myargs.setUsage("java " + myargs.getMainClassName() 
		+ "\n"
	    + "Required params:\n"
		+ "     -classname output_class_name\n" 
		+ "     -in        input_page_file\n"
		+ "\nOptional params:\n" 
		+ "     -encoding    <page_encoding>\n"
		+ "     -contextRoot <webapp root-directory or any other directory>\n"
		+ "        this directory is used as the starting directory for absolute (starting\n"
		+ "        with a \"/\") include/forward directives in a page>. If not specified\n"
		+ "        defaults to the same directory as the page file\n"
		+ "     -out <output_file_name>\n"
		+ "        the output file is optional and defaults to the standard out if not specified."
		);
	//String encoding = myargs.get("encoding", Page.DEFAULT_ENCODING);

	File input 		 = new File(myargs.getRequired("in"));
	File contextRoot = null;
	
	if (myargs.flagExists("contextRoot"))
		contextRoot = new File(myargs.get("contextRoot"));
	else
		contextRoot = input;

	PrintWriter output;
	
	if (myargs.get("out") != null)
		output = new PrintWriter(new FileWriter(myargs.get("out")));
	else
		output = new PrintWriter(new OutputStreamWriter(System.out));
		
	PageParser parser = new PageParser(contextRoot, input, output, myargs.getRequired("classname"), Log.getDefault());
	parser.parse();
	}

}
