001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002// The Molly framework is freely distributable under the terms of an
003// MIT-style license. For details, see the molly pages web site at:
004// http://www.mollypages.org/. Use, modify, have fun !
005
006package fc.web.page;
007
008import java.io.*;
009import java.util.*;
010import java.util.regex.*;
011import fc.io.*;
012import fc.util.*;
013
014/*
015NOTES
016
017Code blocks of the form 
018 [...] 
019cause problems with java arrays
020
021String[] or foo[4] etc.., craps out. So we need to use 
022 [[...]] 
023for the molly code blocks
024
0251. If you are hacking this file, start with parseText()
026
0272. Turn the dbg flag to true to see how the parser works
028
0293. Keep in mind that the order of switch'es in a case statement in various
030methods is not always arbitrary (the order matters in this sort
031of recursive descent parsing)
032
0334. Read www.mollypages.org/page/grammar/index.mp for a intro to parsing
034
0355. This parser as shipped has a set of regression tests in the fc/web/page/test directory.
036These consist of a bunch of *.mp files and corresponding *.java files, each of which is
037known to be generated properly. If you change stuff around, run these regression tests
038again by invoking "java fc.web.page.PageParserTest" Note, if you change things such that
039the .java output of the parser is different, then the tests will fail (since the new .java
040files of your parser will be different to the test ones shipped in fc/web/page/test. In
041this case, once you know that your parser works as you like it, then you should create a
042new baseline for your parser by invoking "java fc.web.page.PageParserTest
043-generateExpected" and then you can use *that* as the new baseline for further changes in
044your parser (you may have to modify the *.mp files in /fc/web/page/test to use your new
045page syntax).
046
0476. 
048When including files, previous versions of the parser constructed a new IncludeFile element
049which would be invoked when the page was written out. When invoked (via render), that element
050would creaet a new PageParser and set includeMode = true on that new parser. This new parser 
051would then parse/write out the subtree of the included file in-line.
052
053This inline processing had issues since the included file could not contain import statements,
054declarations, etc (since those had already been written out by the parent/top level parser).
055Another hack was to pass the child parser the parent/top level object and give access to the 
056top level parse root to the child parser (the child parser would have to be invoked immediaately
057anyway). Also, since inner classes for parse elements are non-static, the separate parser would
058create a parse tree, and although it would add those classes to the top most parse tree, the 
059classes themselves (when trying to write) would refer to the separate output stream of the child
060class (the output stream would also have to be set to the parent class). It was doaable but gets
061un-neccessarily complex.
062
063The only benefit to a seperate parser being able to print start/end sections:
064>> start include file
065 [..invoke child parser]
066>> end include file
067
068In the current/cleaner approach, I simply insert the included file into the character stream.
069But there isn't any easy way to track when that stream finishes and the original content starts 
070again. So we get:
071
072>> start include file
073[..include into original stream and continue parsing]
074-- no end include file line --
075
076*/
077
078/**
079Parses a page and writes out the corresponding java file to the specified output. The
080parser and scanner is combined into one class here for simplicity (a seperate scanner is
081overkill for a simple LL(1) grammar such as molly pages).
082
083@author hursh jain
084*/
085public final class PageParser
086{
087private static final boolean dbg    = false;
088private static final int     EOF    = -1;
089private              int     dbgtab = 0;
090
091String          classname;
092String          packagename = Page.PACKAGE_NAME;
093PageReader      in;
094PrintWriter     out;
095Log             log;
096File            inputFile;
097File            outputFile;
098File            contextRoot;
099boolean         includeMode = false;
100String          src_encoding;
101int       textNodeCounter = 0;
102
103//Read data
104//we use these since stringbuffer/builders do not have a clear/reset function
105CharArrayWriter buf = new CharArrayWriter(4096);
106CharArrayWriter wsbuf = new CharArrayWriter(32);  // ^(whitespace)* 
107int c = EOF;
108
109//PageData
110List  decl       = new ArrayList();     //declarations
111List  inc_decl       = new ArrayList();     //external included declarations
112List  imps           = new ArrayList();     //imports
113List  tree           = new ArrayList();     //code, exp, text etc.
114Map   directives     = new HashMap();       //page options
115Set   circularityTrack = new HashSet();   //track pages already included to stop circular refs
116
117/** 
118The  name ("mimetype") of the[@ mimetype=....] directive. The value of <tt>none</tt> or an
119empty string will turn off writing any mimetype entirely (the user can then write a
120mimetype via the {@link javax.servlet.ServletResponse.setContentType} method manually).
121<p>
122Note, from {@link
123javax.servlet.ServletResponse.setContentType ServletResponse}
124<pre>
125Note that the character encoding cannot be communicated via HTTP headers if the servlet
126does not specify a content type; however, it is still used to encode text written via the
127servlet response's writer.
128</pre>
129*/
130public static String d_mimetype = "mimetype";
131
132/*
133this value (or an empty string) for mimetype means no mimetype
134will be specified (not even the default mimetype)
135*/
136public static String mimetype_none = "none";
137
138/** 
139The name ("encoding") of the [page encoding=....] directive. 
140*/
141public static String d_encoding = "encoding";
142
143/** 
144The name ("src-encoding") of the [page src-encoding=....] directive. 
145*/
146public static String d_src_encoding = "src-encoding";
147
148/** The name ("buffersize") of the [page buffersize=....] directive */
149public static String d_buffersize = "buffersize";
150
151/** The name ("out") of the [page out=....] directive */
152public static String d_out = "out";
153/** A value ("outputstream") of the [page out=outputstream] directive */
154public static String d_out_stream1 = "outputstream";
155/** A value ("outputstream") of the [page out=stream] directive */
156public static String d_out_stream2 = "stream";
157/** A value ("writer") of the [page out=writer] directive */
158public static String d_out_writer = "writer";
159/** The name of the  ("remove-initial-whitespace") directive */
160public static String d_remove_initial_whitespace = "remove-initial-whitespace";
161
162/* 
163Any ".." will be saved as a constant in the constant pool in the class file by javac
164Although java strings when stored ont heap and referenced by a variable are 
165uptp Integer.MAX_VALUE - 1 (2,147,483,647 bytes, ~ 1 GB unicode) in length,
166the constant pool strings are way smaller - 65,535K but not 100% clear and subject to future
167change?
168
169We break any long lines beyond 65535 / 2 chars to multiple out("...") statements, where each out statement
170contains 32772 chars max. This is used by text nodes for example. Relevant when a massive chunk of
171html text exists without any newlines. Each character can double when quoted (" -> \") so to be absolutey
172safe, we approx. half the 65535 number.
173*/
174private static final int MAX_TEXT_LITERAL_LENGTH = 32772;
175
176/* 
177This constructor for internal use.
178
179The parser can be invoked recursively to parse included files as
180well..that's what the includeMode() does (and this construtor is invoked
181when including). When including, we already have a output writer
182created, we use that writer (instead of creating a new one based on
183src_encoding as we do for in normal page parsing mode).
184*/
185private PageParser(
186 File contextRoot, File input, PrintWriter outputWriter, String classname, Log log) 
187throws IOException
188  {
189  this.contextRoot = contextRoot;
190  this.inputFile = input; 
191  this.in  = new PageReader(input);
192  this.out = outputWriter;
193  this.classname = classname;
194  this.log = log;
195
196  circularityTrack.add(input.getAbsolutePath());
197  }
198
199/**
200Creates a new page parser that will use the default log obtained by
201{@link Log#getDefault}
202
203@param  contextRoot absolute path to the webapp context root directory
204@param  input   absolute path to the input page file
205@param  input   absolute path to the output file (to be written to).
206@param  classname classname to give to the generated java class.
207*/
208public PageParser(File contextRoot, File input, File output, String classname) 
209throws IOException
210  {
211  this(contextRoot, input, output, classname, Log.getDefault());
212  }
213
214/**
215Creates a new page parser.
216
217@param  contextRoot absolute path to the webapp context root directory
218@param  input   absolute path to the input page file
219@param  output    absolute path to the output file (to be written to).
220@param  classname classname to give to the generated java class.
221@log  log     destination for internal logging output.
222*/
223public PageParser(
224  File contextRoot, File input, File output, String classname, Log log) 
225throws IOException
226  {
227  this.contextRoot = contextRoot;
228  this.inputFile = input; 
229  this.in  = new PageReader(input);
230  this.outputFile = output;
231  this.classname = classname;
232  this.log = log;
233
234  circularityTrack.add(input.getAbsolutePath());
235  }
236
237void append(final int c)
238  {
239  Argcheck.istrue(c >= 0, "Internal error: recieved c=" + c);
240  buf.append((char)c);
241  }
242
243void append(final char c)
244  {
245  buf.append(c);
246  }
247
248void append(final String str)
249  {
250  buf.append(str);
251  }
252
253/* not used anymore */
254PageParser includeMode()
255  {
256  includeMode = true;
257  return this;
258  }
259
260/**
261Parses the page. If the parse is successful, the java source will be
262generated.
263
264@throws IOException   a parse failure occurred. The java source file
265            may or may not be properly generated or written
266            in this case.
267*/
268public void parse() throws IOException
269  {
270  parseText();  
271
272  if (! includeMode)
273    {
274    writePage();
275    out.close();
276    }
277  else{
278    out.flush();
279    }
280
281  in.close();
282  }
283
284//util method for use in the case '[' branch of parseText below.
285private Text newTextNode()
286  {
287  Text text = new Text(buf);
288  tree.add(text);
289  buf.reset();
290  return text;
291  }
292
293  /* 
294  Things always start off with a text node - all tags will be then processed as/when seen.
295  
296  If there is any white space at the beginning (before any other directives/tags are seen), that
297  will become part of the first text node. Any white space *after* a directive/tag will become part 
298  of the subsequent text node.
299  
300  This is relevant when removing initial whitespace (if a directive to do so exists). If such a directive
301  exists, the strategy is to keep going down our list of parsed nodes, remove any text nodes that are
302  only whitespace, and then ignore any leading whitespace from the first non-empty text node we find.
303  
304  The textnode itself escapes any whitespace with string escapes (a newline -> "\n") since that is
305  fed to the page out(..) as a string. So, to remove whitespaces once they are already escaped into
306  strings in the textnode data, we have to remove these escaped versions of those whitespaces. 
307  
308  To remove a newline, we have to search for "\n". However if the user had typed "\n" to begin with, 
309  that would be converted to "\\n", so it gets very tricky to distinguish between what was typed
310  in the page and what we escaped.
311
312    In example (1), the page itself has 2 characters typed: [\, n] and so on. Anything not <NL>
313    is actually typed as a character in the page.
314    
315  source            text node            output                    in browser
316    \n                 \\n         out.print("\\n")              \n          (1)
317  \<NL>              \\\n              out.print("\\\n")             \           (2)
318    \n<NL>             \\n\n             out.print("\\n\n")            \n<NL>      (3)
319    
320  Carrying on:  
321  \\n<NL>             \\\\n\n           out.print("\\\\n\n")          \\n<NL>    (4)
322  \\\n<NL>            \\\\\\n\n         out.print("\\\\\\n\n")        \\\n<NL>   (5)
323
324  To eliminate whitespaces, NL in this example, we have to search for: \n *BUT* ignore any \\n
325
326  \n                 \\n                out.print("\\n")              \n        (1)
327                       \\n                     "\\n"                    \n        1a: no newline in src!
328
329  \<NL>              \\\n              out.print("\\n\n")             \<NL>     (2)
330                       \\ [\n]                 "\\"                      \        2a: NL in src gone!        
331
332  \n<NL>             \\n\n              out.print("\\n\n")            \n<NL>    (3)
333                       \\n [\n]                 "\\n"                    \n       3a: NL in src gone!        
334  
335  \\n<NL>             \\\\n\n           out.print("\\\\n\n")          \\n<NL>   (4)
336                        \\ \\n [\n]             "\\\\n"                 \\n       4a: NL in src gone!
337                          
338  \\\n<NL>            \\\\\\n\n         out.print("\\\\\n\n")         \\\n<NL>  (5)
339  \\\n<NL>           \\ \\ \\n [\n]     out.print("\\\\\\n")           \\\n     5a: NL in src gone!
340  
341  It is quite hokey. If we use \s+ regular expression, that will search for actual NL characters.
342  What we want is to search for '\','n' as long as it is not '\','\','n'
343  
344  We could do this before we escape anything in the Text node itself (as the lexical level) but I've 
345  done is post lexical - at writeRenderMethod(). Doing in after the parse step does not touch any 
346  existing parsing/lexing code so its safer, a one and done deal right now basically. And when parsing, 
347  Text nodes break multiple lines (separated by NL) into separate internal items in a list which makes
348  it easier for us - We just have to examine each internal item as a separate line.
349  
350  Doing it while lexing - using in.skipWhitespace() - is also tricky because parsing is recursive
351  and we have to know when to invoke the skip and when not too.
352  */
353void parseText() throws IOException
354  {
355  if (dbg) dbgenter(); 
356
357  while (true)
358    { 
359    c = in.read();
360    
361    if (c == EOF) {
362      tree.add(new Text(buf));
363      buf.reset();
364      break;
365      }
366    
367    switch (c)
368      { 
369      //Escape start tags
370      case '\\':
371        /*  we don't need to do this: previously, expressions
372        were [...] but now they are [=...], previously we needed
373        to escape \[[ entirely (since if we escaped \[ the second
374        [ would start an expression
375        */
376        /*        
377        if (in.match("[["))  
378          append("[[");
379        */
380        //escape only \[... otherwise leave \ alone
381        if (in.match("["))
382          append("[");
383        else
384          append(c);
385        break;
386
387      case '[':
388        /* suppose we have
389        \[[
390        escape handling above will capture \[
391        then the second '[' drops down here. Good so far.
392        But we must not create a new text object here by
393        default...only if we see another [[ or [= or [include or
394        whatever. 
395        */
396        /*
397        But creating a text object at the top is easier
398        then repeating this code at every if..else branch below
399        but this creates superfluous line breaks.
400        
401        hello[haha]world
402        -->prints as-->
403        hello  (text node 1)
404        [haha] (text node 2)
405        world  (text node 3)
406        --> we want
407        hello[haha]world (text node 1)
408        */
409          
410        if (in.match('[')) { 
411          newTextNode();
412          parseCode(); 
413          }
414        else if (in.match('=')) {
415          Text text = newTextNode();
416          parseExpression(text);
417          }
418        else if (in.match('!')) {
419          newTextNode();
420          parseDeclaration();
421          }
422        else if (in.match("/*")) {
423          newTextNode();
424          parseComment(); 
425          }
426        else if (in.matchIgnoreCase("page")) {
427          newTextNode();
428          parseDirective();
429          }
430        //longest match: "include-file" etc., last: "include"
431        else if (in.matchIgnoreCase("include-file")) {
432          newTextNode();
433          parseIncludeFile();
434          }
435        else if (in.matchIgnoreCase("include-decl")) {
436          newTextNode();
437          parseIncludeDecl();
438          }
439        else if (in.matchIgnoreCase("include")) {
440          newTextNode();
441          parseInclude();
442          }
443        else if (in.matchIgnoreCase("forward")) {
444          newTextNode();
445          parseForward();
446          }
447        else if (in.matchIgnoreCase("import")) {
448          newTextNode();
449          parseImport();
450          }
451        else  {
452          //System.out.println("c1=" + (char)c);
453          append(c);
454          }
455        break;  
456  
457      default:
458        //System.out.println("c2=" + (char)c);
459        append(c);
460        
461      } //switch    
462    } //while
463
464  if (dbg) dbgexit(); 
465  }
466  
467void parseCode() throws IOException
468  {
469  if (dbg) dbgenter(); 
470
471  int startline = in.getLine();
472  int startcol = in.getCol();
473  
474  while (true)
475    {
476    c = in.read();  
477  
478    switch (c) /* the order of case tags is important. */
479      {
480      case EOF:
481        unclosed("code", startline, startcol);
482        if (dbg) dbgexit(); 
483        return;
484
485      case '/':   //Top level:  // and /* comments
486        append(c);
487        c = in.read();
488        append(c);
489        if (c == '/') 
490          appendCodeSlashComment();
491        else if (c == '*') 
492          appendCodeStarComment();
493          break;        
494    
495      case '"':     //strings outside of any comment
496        append(c);
497        appendCodeString();  
498        break;
499        
500      case '\'':
501        append(c);
502        appendCodeCharLiteral();
503        break;
504        
505      case ']':
506        if (in.match(']')) {
507          tree.add(new Code(buf));
508          buf.reset();
509          if (dbg) dbgexit(); 
510          return;
511          }
512        else {
513          append(c);
514          }
515        break;
516      
517      /* 
518      a hash by itself on a line starts a hash section.
519      whitespace before the # on that line is used as an
520      printing 'out' statements for that hash.
521      
522      for (int n = 0; n < ; n++) {
523      ....# foo #
524      | }
525      |=> 4 spaces 
526      so nice if generated code looked like:
527      
528      for (int n = 0; n < ; n++) {
529          out.print(" foo ");
530          }
531      */
532      case '\n':
533      case '\r':
534        append(c);       //the \n or \r just read
535        readToFirstNonWS();  //won't read past more newlines 
536        //is '#' is first non-ws on this line ?
537        c = in.read();
538        if (c == '#') {           
539          tree.add(new Code(buf));
540          buf.reset();
541          //whitespace provides indentation offset
542          parseHash(wsbuf.toString()); 
543          }
544        else{
545          append(wsbuf.toString());  //wsbuf contains codetext
546          //let other cases also handle first non-ws or EOF
547          in.unread();    
548          }
549        break;
550      
551      /* in this case, hash does not start on a new line, like:
552         for (...) { #
553      */
554      case '#':
555        tree.add(new Code(buf));
556        buf.reset();
557        parseHash(null);
558        break;  
559      
560      default:
561        append(c);
562      } //switch    
563    } //while
564  }
565
566void parseHash(String offset) throws IOException
567  {
568  if (dbg) dbgenter(); 
569
570  int startline = in.getLine();
571  int startcol = in.getCol();
572
573  while (true)
574    {
575    c = in.read();  
576  
577    switch (c)
578      {
579      case EOF: 
580        unclosed("hash", startline, startcol);
581        if (dbg) dbgexit(); 
582        return;
583
584      //special case: very common and would be a drag to escape
585      //this every time:
586      //  # <table bgcolor="#ffffff">....   #
587      //Now, all of:
588      //  bgcolor="#xxx"  
589      //  bgcolor='#xxx'
590      //  bgcolor="\#xxx" 
591      //will work the same and give: bgcolor="#xxx"
592      //1)
593      //However to get a:
594      //  bgcolor=#xxx    (no quoted around #xxx)
595      //we still have to say:
596      //  bgcolor=\#xxx   
597      //2)
598      //Of course, since we special case this, then:
599      //  #"bar"#
600      // that ending # is lost and we end up with
601      //  #"bar"  with no closing hash
602      //So we need to make sure that we write:
603      //  #"bar" #
604      // instead
605
606      case '\'':
607      case '"':
608        append(c);
609        if (in.match('#')) 
610          append('#');
611        break;
612        
613      case '\\':
614        if (in.match('[')) 
615          append('[');      
616        else if (in.match('#'))
617          append('#');
618        else
619          append(c);
620        break;
621        
622      case '[':
623        if (in.match('=')) {
624          Hash hash = new Hash(offset, buf);
625          tree.add(hash);
626          buf.reset();
627          parseExpression(hash);
628          }
629        else{
630          append(c);
631          }
632        break;
633
634      /*
635      this case is not needed but is a bit of a optimization
636      for (int n = 0; n < 1; n++) {
637        #
638        foo
639      ....#...NL
640        }
641      avoids printing the dots (spaces) and NL in this case
642      (the newline after foo is still printed)
643      */
644      case '\n':
645      case '\r':
646        append(c);
647        readToFirstNonWS(); 
648        c = in.read();
649        //'#' is first non-ws on the line
650        if (c == '#') {
651          tree.add(new Hash(offset, buf));
652          buf.reset();
653          //skipIfWhitespaceToEnd();
654          if (dbg) dbgexit(); 
655          return;
656          }
657        else {
658          append(wsbuf.toString());
659          in.unread(); //let other cases also handle first non-ws   
660          }
661        break;
662
663      case '#':
664        tree.add(new Hash(offset, buf));  
665        //skipIfWhitespaceToEnd();
666        buf.reset();
667        if (dbg) dbgexit(); 
668        return;
669        
670      default:
671        append(c);
672      }  //switch 
673    } //while
674  }
675
676/**
677[page <<<FOO]
678...as-is..no parse, no interpolation..
679FOO
680*/
681void parseHeredoc(StringBuilder directives_buf) throws IOException
682  {
683  if (dbg) dbgenter(); 
684
685  int startline = in.getLine();
686  int startcol = in.getCol();
687      
688  int i = directives_buf.indexOf("<<<"); /* "<<<".length = 3 */
689  CharSequence subseq = directives_buf.substring(
690            i+3, 
691            /*directives_buf does not have a ending ']' */
692            directives_buf.length() 
693            );
694    
695  final String      heredoc     = subseq.toString().trim();
696  final int         heredoc_len = heredoc.length();
697  final CharArrayWriter heredoc_buf = new CharArrayWriter(2048);
698
699  /* 
700  the ending heredoc after newline speeds things up a bit
701  which is why is traditionally used i guess, otherwise
702  we have to try a full match every first match. this 
703  implementation doesn't care where the ending heredoc
704  appears (can be anywhere)...simplifies the implementation.
705  */
706  
707  while (true)
708    { 
709    c = in.read();
710    
711    if (c == EOF) {
712      unclosed("heredoc: <<<"+heredoc, startline, startcol);
713      break;
714      }
715      
716    if (c == heredoc.charAt(0))
717      {
718      boolean matched = true;
719      if (heredoc_len > 1) {
720        matched = in.match(heredoc.substring(1));
721        }
722      if (matched) {  
723        tree.add(new Heredoc(heredoc_buf));
724        break;
725        }
726      }
727    
728    //default action
729    heredoc_buf.append((char)c);  
730    } //while
731    
732  if (dbg) dbgexit(); 
733  }
734
735/*
736Text is the parent node for the expression. A new expression is parsed,
737created and added to the text object by this method
738*/
739void parseExpression(Element parent) throws IOException
740  {
741  if (dbg) dbgenter(); 
742
743  int startline = in.getLine();
744  int startcol = in.getCol();
745
746  while (true)
747    {
748    c = in.read();      
749  
750    switch (c)
751      {
752      case EOF:
753        unclosed("expression", startline, startcol);
754        if (dbg) dbgexit(); 
755        return;
756
757      case '\\':
758        if (in.match(']')) 
759          append(']');    
760        else
761          append(c);
762        break;
763
764      case ']':
765        if (buf.toString().trim().length() == 0)
766          error("Empty expression not allowed", startline, startcol);
767        parent.addExp(new Exp(buf));
768        buf.reset();  
769        if (dbg) dbgexit(); 
770        return;
771        
772      default:
773        append(c);
774      }
775    }
776  }
777
778void parseComment() throws IOException
779  {
780  if (dbg) dbgenter(); 
781
782  int startline = in.getLine();
783  int startcol = in.getCol();
784
785  while (true)
786    {
787    c = in.read();      
788  
789    switch (c)
790      {
791      case EOF:
792        unclosed("comment", startline, startcol);
793        if (dbg) dbgexit(); 
794        return;
795        
796      case '*':
797        if (in.match("/]"))
798          {
799          tree.add(new Comment(buf));
800          buf.reset();  
801          if (dbg) dbgexit(); 
802          return;
803          }
804        else
805          append(c);  
806        break;
807      
808      default:
809        append(c);
810      }
811    }
812  }
813
814void parseDeclaration() throws IOException
815  {
816  if (dbg) dbgenter(); 
817  int startline = in.getLine();
818  int startcol = in.getCol();
819
820  while (true)
821    {
822    c = in.read();      
823  
824    switch (c)
825      {
826      case EOF:
827        unclosed("declaration", startline, startcol);
828        if (dbg) dbgexit(); 
829        return;
830      
831      case '!':
832        if (in.match(']')) {
833          decl.add(new Decl(buf));
834          buf.reset();  
835          if (dbg) dbgexit(); 
836          return;
837          }
838        else{
839          append(c);
840          }
841        break;
842
843      //top level // and /* comments, ']' (close decl tag)
844      //is ignored within them
845      case '/':   
846        append(c);
847        c = in.read();
848        append(c);
849        if (c == '/') 
850          appendCodeSlashComment();
851        else if (c == '*') 
852          appendCodeStarComment();
853          break;        
854    
855      //close tags are ignored within them
856      case '"':     //strings outside of any comment
857        append(c);
858        appendCodeString();  
859        break;
860        
861      case '\'':
862        append(c);
863        appendCodeCharLiteral();
864        break;
865            
866      default:
867        append(c);
868      }
869    }
870
871  }
872
873void parseDirective() throws IOException
874  {
875  if (dbg) dbgenter(); 
876
877  int startline = in.getLine();
878  int startcol = in.getCol();
879
880  StringBuilder directives_buf = new StringBuilder(1024);
881
882  while (true)
883    {
884    c = in.read();      
885  
886    switch (c)
887      {
888      case EOF:
889        unclosed("directive", startline, startcol);
890        if (dbg) dbgexit(); 
891        return;
892        
893      case ']':
894        if (directives_buf.indexOf("<<<") >= 0)  {
895          parseHeredoc(directives_buf); 
896          }
897        else{/* other directives used at page-generation time */
898          addDirectives(directives_buf);
899          }
900          
901        if (dbg) dbgexit(); 
902        return;
903      
904      default:
905        directives_buf.append((char)c);
906      }
907    }
908
909  }
910
911//[a-zA-Z_\-0-9] == ( \w | - )
912static final Pattern directive_pat = Pattern.compile(
913  //foo = "bar baz" (embd. spaces)
914  "\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*\"((?:.|\r|\n)+?)\""  
915  + "|"
916  //foo = "bar$@#$" (no spaces) OR foo = bar (quotes optional)
917  + "\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*(\\S+)" 
918  );
919  
920    
921void addDirectives(StringBuilder directives_buf) throws ParseException
922  {
923  if (dbg) {
924    dbgenter(); 
925    System.out.println("-------directives section--------");
926    System.out.println(directives_buf.toString());
927    System.out.println("-------end directives-------");
928    }
929  
930  String name, value;
931  try {
932    Matcher m = directive_pat.matcher(directives_buf);
933    while (m.find()) 
934      {
935      if (dbg) System.out.println(">>>>[0]->" + m.group() 
936        + "; [1]->" + m.group(1)  
937        + " [2]->" + m.group(2)  
938        + " [3]->" + m.group(3)  
939        + " [4]->" + m.group(4));
940        
941      name = m.group(1) != null ? m.group(1).toLowerCase() :
942                    m.group(3).toLowerCase();
943      value = m.group(2) != null ? m.group(2).toLowerCase() :
944                     m.group(4).toLowerCase();
945
946      if (name.equals(d_buffersize)) 
947        {
948        //can throw parse exception
949        directives.put(name, 
950          IOUtil.stringToFileSize(value.replace("\"|'",""))); 
951        }
952      else if (name.equals(d_encoding)) {
953        directives.put(name, value.replace("\"|'",""));       
954        }
955      else if (name.equals(d_src_encoding)) {
956        directives.put(name, value.replace("\"|'",""));       
957        } 
958      else if (name.equals(d_mimetype)) {
959        directives.put(name, value.replace("\"|'",""));       
960        }
961      else if (name.equals(d_out)) {
962        directives.put(name, value.replace("\"|'",""));       
963        } 
964      else if (name.equals(d_remove_initial_whitespace)) {
965        directives.put(name, value.replace("\"|'",""));       
966        } 
967      //else if .... other directives here as needed....
968      else 
969        throw new Exception("Do not understand directive: " + m.group());
970      }
971    if (dbg) System.out.println("Added directives: " + directives);
972    }
973  catch (Exception e) {
974    throw new ParseException("File: " + inputFile.getAbsolutePath() 
975                  + ";\n" + e.toString());
976    }
977
978  if (dbg) dbgexit(); 
979  }
980
981void parseIncludeFile() throws IOException
982  {
983  if (dbg) dbgenter(); 
984
985  int startline = in.getLine();
986  int startcol = in.getCol();
987  String option = null;
988  
989  while (true)
990    {
991    c = in.read();      
992  
993    switch (c)
994      {
995      case EOF:
996        unclosed("include-file", startline, startcol);
997        if (dbg) dbgexit(); 
998        return;
999        
1000      case '[':
1001        if (in.match('=')) {
1002  //log.warn("Expressions cannot exist in file includes. Ignoring \"[=\"
1003  //in [include-file... section starting at:", startline, startcol);
1004  //instead of warn, we will error out. failing early is better.
1005  //this does preclude having '[=' in the file name, but it's a good
1006  //tradeoff
1007          error("Expressions cannot exist in file includes. The offending static-include section starts at:", startline, startcol);
1008          }
1009        append(c);
1010        break;
1011      
1012      case ']':
1013        includeFile(buf, option); /* not added in the tree, just included in the stream */
1014        buf.reset();  
1015        if (dbg) dbgexit(); 
1016        return;
1017      
1018      case 'o':
1019        if (! in.match("ption"))
1020          append(c);
1021        else{
1022          skipWS();
1023          if (! in.match("=")) {
1024            error("bad option parameter in file include: ", startline, startcol);
1025            }
1026          skipWS();
1027          
1028          int c2;
1029          StringBuilder optionbuf = new StringBuilder();
1030          while (true) {
1031            c2 = in.read();
1032            if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) {   
1033              in.unread();
1034              break;
1035              }
1036            optionbuf.append((char)c2);
1037            }
1038          
1039          option = optionbuf.toString();
1040          //System.out.println(option);
1041          } //else
1042        break;
1043  
1044      default:
1045        append(c);
1046      }
1047    }
1048  }
1049
1050void parseIncludeDecl() throws IOException
1051  {
1052  if (dbg) dbgenter(); 
1053
1054  int startline = in.getLine();
1055  int startcol = in.getCol();
1056  String option = null;
1057  
1058  while (true)
1059    {
1060    c = in.read();      
1061  
1062    switch (c)
1063      {
1064      case EOF:
1065        unclosed("include-decl", startline, startcol);
1066        if (dbg) dbgexit(); 
1067        return;
1068        
1069      case '[':
1070        if (in.match('=')) {
1071    //log.warn("Expressions cannot exist in file includes. Ignoring \"[=\" in [include-static... section starting at:", startline, startcol);
1072    //we will throw an exception. failing early is better. this
1073    //does preclude having '[=' in the file name, but it's a good tradeoff
1074          error("Expressions cannot exist in include-decl. The offending static-include section starts at:", startline, startcol);
1075          }
1076        append(c);
1077        break;
1078      
1079      case ']':
1080        IncludeDecl i = new IncludeDecl(buf);
1081        if (option != null)
1082          i.setOption(option);
1083        inc_decl.add(i);
1084        buf.reset();  
1085        if (dbg) dbgexit(); 
1086        return;
1087      
1088      case 'o':
1089        if (! in.match("ption"))
1090          append(c);
1091        else{
1092          skipWS();
1093          if (! in.match("=")) {
1094            error("bad option parameter in include-code: ", startline, startcol);
1095            }
1096          skipWS();
1097          
1098          int c2;
1099          StringBuilder optionbuf = new StringBuilder();
1100          while (true) {
1101            c2 = in.read();
1102            if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) {   
1103              in.unread();
1104              break;
1105              }
1106            optionbuf.append((char)c2);
1107            }
1108          
1109          option = optionbuf.toString();
1110          //System.out.println(option);
1111          } //else
1112        break;
1113  
1114      default:
1115        append(c);
1116      }
1117    }
1118  }
1119
1120//the filename/url can be optionally double quoted. leading/trailing
1121//double quotes (if any) are ignored when an include is rendered...
1122//this way there isn't any additional parsing needed here...I could
1123//ignore the optional quote here (and that's the formal proper way) 
1124//and then not move the ignore quote logic into the render() method but
1125//this way is good too...and simpler..
1126//same goes for the other parseIncludeXX/ForwardXX functions.
1127void parseInclude() throws IOException
1128  {
1129  if (dbg) dbgenter(); 
1130
1131  int startline = in.getLine();
1132  int startcol = in.getCol();
1133  Include include = new Include();
1134  while (true)
1135    {
1136    c = in.read();      
1137  
1138    switch (c)
1139      {
1140      case EOF:
1141        unclosed("include", startline, startcol);
1142        if (dbg) dbgexit(); 
1143        return;
1144        
1145      case '[':
1146        if (in.match('=')) {
1147          include.add(buf);
1148          buf.reset();
1149          parseExpression(include);
1150          }
1151        else{
1152          append(c);
1153          }
1154        break;
1155      
1156      case ']':
1157        include.add(buf);
1158        tree.add(include);
1159        buf.reset();  
1160        if (dbg) dbgexit(); 
1161        return;
1162      
1163      default:
1164        append(c);
1165      }
1166    }
1167  }
1168
1169void parseForward() throws IOException
1170  {
1171  if (dbg) dbgenter(); 
1172
1173  int startline = in.getLine();
1174  int startcol = in.getCol();
1175
1176  Forward forward = new Forward();
1177  while (true)
1178    {
1179    c = in.read();      
1180  
1181    switch (c)
1182      {
1183      case EOF:
1184        unclosed("forward", startline, startcol);
1185        if (dbg) dbgexit(); 
1186        return;
1187        
1188      case '[':
1189        if (in.match('=')) {
1190          forward.add(buf);
1191          buf.reset();
1192          parseExpression(forward);
1193          }
1194        else{
1195          append(c);
1196          }
1197        break;
1198      
1199      case ']':
1200        forward.add(buf);
1201        tree.add(forward);
1202        buf.reset();  
1203        if (dbg) dbgexit(); 
1204        return;
1205      
1206      default:
1207        append(c);
1208      }
1209    }
1210  }
1211
1212//we need to parse imports seperately because they go outside
1213//a class declaration (and [!...!] goes inside a class)
1214//import XXX.*;
1215//class YYY {
1216//[!....stuff from here ....!]
1217//...
1218void parseImport() throws IOException
1219  {
1220  if (dbg) dbgenter(); 
1221
1222  int startline = in.getLine();
1223  int startcol = in.getCol();
1224
1225  while (true)
1226    {
1227    c = in.read();      
1228  
1229    switch (c)
1230      {
1231      case EOF:
1232        unclosed("import", startline, startcol);
1233        if (dbg) dbgexit(); 
1234        return;
1235      
1236      case '\n':
1237        imps.add(new Import(buf));
1238        buf.reset();
1239        break;
1240        
1241      case ']':
1242        imps.add(new Import(buf));
1243        buf.reset();  
1244        if (dbg) dbgexit(); 
1245        return;
1246      
1247      default:
1248        append(c);
1249      }
1250    }
1251  }
1252
1253/*
1254Called when // was read at the top level inside a code block. Appends
1255the contents of a // comment to the buffer (not including the trailing
1256newline)
1257*/
1258void appendCodeSlashComment() throws IOException
1259  {
1260  if (dbg) dbgenter();
1261  
1262  while (true) 
1263    {
1264    c = in.read();
1265    
1266    if (c == EOF)
1267      break;
1268  
1269    //do not append \r, \r\n, or \n, that finishes the // comment
1270    //we need that newline to figure out if the next line is a hash
1271    //line
1272    if (c == '\r') {
1273      in.unread();
1274      break;
1275      }
1276    
1277    if (c == '\n') {
1278      in.unread();
1279      break;  
1280      }
1281
1282    append(c);
1283    }
1284  
1285  if (dbg) dbgread("CodeSLASHComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
1286  if (dbg) dbgexit();
1287  }
1288
1289/*
1290Called when /* was read at the top level inside a code block. Appends
1291the contents of a /*comment to the buffer. (not including any trailing
1292newline or spaces)
1293*/
1294void appendCodeStarComment() throws IOException
1295  {
1296  if (dbg) dbgenter(); 
1297  
1298  while (true) 
1299    {
1300    c = in.read();  
1301
1302    if (c == EOF)
1303      break;
1304  
1305    append(c);
1306    
1307    if (c == '*') 
1308      {
1309      if (in.match('/')) {
1310        append('/');
1311        break;
1312        }
1313      }
1314    }
1315
1316  if (dbg) dbgread("CodeSTARComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
1317  if (dbg) dbgexit(); 
1318  }
1319
1320/*
1321Called (outside of any comments in the code block) when: 
1322--> parseCode()
1323     ... "
1324         ^ (we are here)
1325*/
1326void appendCodeString() throws IOException
1327  {
1328  if (dbg) dbgenter(); 
1329
1330  int startline = in.getLine();
1331  int startcol = in.getCol();
1332
1333  while (true) 
1334    {
1335    c = in.read();
1336  
1337    if (c == EOF || c == '\r' || c == '\n')
1338      unclosed("string literal", startline, startcol);
1339  
1340    append(c);
1341  
1342    if (c == '\\') {
1343      c = in.read();
1344      if (c == EOF)
1345        unclosed("string literal", startline, startcol);
1346      else {
1347        append(c);
1348        continue;   //so \" does not hit the if below and break
1349        }
1350      }
1351    
1352    if (c == '"')
1353      break;
1354    }
1355
1356  if (dbg) dbgread("appendCodeString Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
1357  if (dbg) dbgexit(); 
1358  }
1359
1360
1361/*
1362Called (outside of any comments in the code block) when: 
1363--> parseCode()
1364     ... '
1365         ^ (we are here)
1366*/
1367void appendCodeCharLiteral() throws IOException
1368  {
1369  if (dbg) dbgenter(); 
1370
1371  int startline = in.getLine();
1372  int startcol = in.getCol();
1373
1374  while (true) 
1375    {
1376    c = in.read();
1377  
1378    if (c == EOF || c == '\r' || c == '\n')
1379      unclosed("char literal", startline, startcol);
1380  
1381    append(c);
1382  
1383    if (c == '\\') {
1384      c = in.read();
1385      if (c == EOF)
1386        unclosed("char literal", startline, startcol);
1387      else {
1388        append(c);
1389        continue;   //so \' does not hit the if below and break
1390        }
1391      }
1392    
1393    if (c == '\'')
1394      break;
1395    }
1396
1397  if (dbg) dbgread("appendCodeCharLiteral Finished: Buffer=" + StringUtil.viewableAscii(buf.toString()));
1398  if (dbg) dbgexit(); 
1399  }
1400
1401
1402/*
1403Reads from the current position till the first nonwhitespace char, EOF or
1404newline is encountered. Reads are into the whitespace buffer. does not
1405consume the character past the non-whitespace character and does
1406NOT read multiple lines of whitespace.
1407*/
1408void readToFirstNonWS() throws IOException 
1409  {
1410  wsbuf.reset();
1411
1412  while (true)
1413    {
1414    c = in.read();
1415  
1416    if (c == '\r' || c == '\n')
1417      break;
1418      
1419    if (c == EOF || ! Character.isWhitespace(c))
1420      break;
1421  
1422    wsbuf.append((char)c);
1423    }
1424    
1425  in.unread();
1426  }
1427
1428//skip till end of whitespace or EOF. does not consume any chars past 
1429//the whitespace.
1430void skipWS() throws IOException
1431  {
1432  int c2 = EOF;
1433  while (true) {
1434    c2 = in.read();
1435    if (c2 == EOF || ! Character.isWhitespace(c2)) {
1436      in.unread();
1437      break;
1438      }
1439    } 
1440  }
1441  
1442//skips to the end of line if the rest of the line is (from the current
1443//position), all whitespace till the end. otherwise, does not change 
1444//current position. consumes trailing newlines (if present) when reading 
1445//whitespace.
1446void skipIfWhitespaceToEnd() throws IOException
1447  {
1448  int count = 0;
1449  
1450  while (true) 
1451    {
1452    c = in.read();
1453      count++;
1454
1455    if (c == '\r') {
1456      in.match('\n');
1457      return;
1458      }
1459      
1460    if (c == '\n' || c == EOF)
1461      return;
1462      
1463    if (! Character.isWhitespace(c))
1464      break;
1465      }
1466
1467  in.unread(count);
1468  }
1469
1470//not used anymore but left here for potential future use. does not
1471//consume the newline (if present)
1472void skipToLineEnd() throws IOException 
1473  {
1474    while (true) 
1475      {
1476      int c = in.read();
1477      if (c == EOF) {
1478        in.unread();
1479      break;
1480        }
1481      if (c == '\n' || c == '\r') { 
1482        in.unread();
1483        break;
1484        }
1485      }
1486    }
1487
1488String quote(final char c) 
1489  {
1490    switch (c)
1491      {
1492      case '\r':
1493            return "\\r";
1494            
1495      case '\n':
1496            return "\\n";
1497 
1498    case '\"':   /* this is a quirk, '\"' is same as '"' for char literals, keeping as-is for legacy*/
1499      //can also say: new String(new char[] {'\', '"'})
1500            return "\\\"";    //--> \"
1501 
1502    case '\\':
1503            return "\\\\";
1504    
1505      default:
1506        return String.valueOf(c);
1507      }
1508    }
1509
1510//======= util and debug methods ==========================
1511String methodName(int framenum)
1512  {
1513  StackTraceElement ste[] = new Exception().getStackTrace();
1514  //get method that called us, we are ste[0]
1515  StackTraceElement st = ste[framenum];
1516  String file = st.getFileName();
1517  int line = st.getLineNumber();
1518  String method = st.getMethodName();
1519  String threadname = Thread.currentThread().getName();
1520  return method + "()";   
1521  }
1522
1523void dbgenter() {
1524  System.out.format("%s-->%s\n", StringUtil.repeat('\t', dbgtab++), methodName(2));
1525  }
1526  
1527void dbgexit() {
1528  System.out.format("%s<--%s\n", StringUtil.repeat('\t', --dbgtab), methodName(2));
1529  }
1530
1531void dbgread(String str) {
1532  System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str));
1533  }
1534
1535void dbgread(String str, List list) {
1536  System.out.format("%s %s: ", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str));
1537  for (int n = 0; n < list.size(); n++) {
1538    System.out.print( StringUtil.viewableAscii( (String)list.get(n) ) );
1539    }
1540  System.out.println("");
1541  }
1542
1543void dbgread(char c) {
1544  System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(c));
1545  }
1546
1547void dbgread(CharArrayWriter buf) {
1548  System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(buf.toString()));
1549  }
1550
1551void unclosed(String blockname, int startline, int startcol) throws IOException
1552  {
1553  throw new IOException(blockname + " tag not closed.\nThis tag was possibly opened in: \nFile:"
1554    + inputFile + ", line:" 
1555    + startline + " column:" + startcol +
1556    ".\nCurrent line:" + in.getLine() + " column:" + in.getCol());  
1557  }
1558
1559void error(String msg, int line, int col) throws IOException
1560  {
1561  throw new IOException("Error in File:" + inputFile + " Line:" + line + " Col:" + col + " " + msg);  
1562  }
1563
1564void error(String msg) throws IOException
1565  {
1566  throw new IOException("Error in File:" + inputFile + " " + msg);  
1567  }
1568
1569//============== Non Parsing methods ================================
1570void o(Object str) {
1571  out.print(str);
1572  }
1573
1574void ol(Object str) {
1575  out.println(str); 
1576  }
1577
1578void ol() {
1579  out.println();
1580  }
1581  
1582/**
1583Returns the src_encoding directive (if any) defined in this page or <tt>null</tt> if no source encoding
1584has been specified.
1585*/
1586String getSourceEncoding() {
1587  return src_encoding;
1588  }
1589
1590/* 
1591include an external file whose contents will be rendered as part of the page.
1592*/ 
1593void includeFile(CharArrayWriter buf, String option) throws IOException
1594  {
1595  String str;
1596  
1597  if (dbg) dbgread("<new INCLUDE-FILE> "); 
1598  str = removeLeadingTrailingQuote(buf.toString().trim());
1599  
1600  File includeFile = null;
1601  File parentDir = inputFile.getParentFile();
1602  if (parentDir == null) {
1603    parentDir = new File(".");
1604    }
1605
1606  if (str.startsWith("/"))
1607    includeFile = new File(contextRoot, str);
1608  else
1609    includeFile = new File(parentDir, str);
1610        
1611  //System.out.println(">>>>>>>>>> f="+f +";root="+contextRoot);
1612      
1613  if (! includeFile.exists()) {
1614    throw new IOException("Include file does not exist: " + includeFile.getCanonicalPath());
1615    }
1616
1617  if (circularityTrack.contains(includeFile.getAbsolutePath())) {
1618     throw new IOException("Circularity detected when including: " + includeFile.getCanonicalPath() + "\nAlready included the following files: " + circularityTrack);
1619    }
1620
1621  tree.add(new MollyComment(
1622    "//>>>START INCLUDE from: " + includeFile.getAbsolutePath()));
1623    
1624  /*
1625    PageParser pp = new PageParser(contextRoot, includeFile, out, classname, log);
1626    pp.includeMode().parse();  //writes to out
1627  */
1628  
1629  in.insertIntoStream(includeFile);
1630
1631  /* this is printed immediately before the inserted contents can be processed, so don't add this */
1632  /*
1633  tree.add(new MollyComment(
1634    "//>>>END INCLUDE from: " + includeFile.getAbsolutePath()));
1635  */
1636  
1637  circularityTrack.add(includeFile.getAbsolutePath());
1638  }
1639
1640  
1641void writePage() throws IOException
1642  { 
1643  if (! includeMode)
1644    {
1645    if (directives.containsKey(d_src_encoding)) {
1646      this.src_encoding = (String) directives.get(d_src_encoding);
1647      this.src_encoding = removeLeadingTrailingQuote(this.src_encoding);
1648      }
1649  
1650    //create a appropriate PrintWriter based on either the default
1651    //java encoding or the page specified java encoding
1652    //the java source file will be written out in this encoding
1653  
1654    FileOutputStream  fout = new FileOutputStream(outputFile);
1655    OutputStreamWriter  fw   = (src_encoding != null) ?
1656        new OutputStreamWriter(fout, src_encoding) :
1657        new OutputStreamWriter(fout, Page.DEFAULT_SRC_ENCODING);
1658        
1659    out = new PrintWriter(new BufferedWriter(fw));
1660    }
1661    
1662  if (! includeMode) 
1663    {
1664    writePackage();
1665    writeImports();
1666    
1667    o ("public class ");
1668    o (classname);
1669    ol(" extends fc.web.page.PageImpl");
1670    ol("{");
1671    }
1672
1673  writeFields();
1674
1675  if (! includeMode) {
1676    writeConstructor();
1677    }
1678    
1679  writeMethods();
1680  
1681  if (! includeMode) {
1682    ol("}");
1683    }
1684  }
1685
1686void writePackage()
1687  {
1688  o ("package ");
1689  o (packagename);
1690  ol(";");
1691  ol();
1692  }
1693  
1694void writeImports() throws IOException
1695  {
1696  ol("import javax.servlet.*;");
1697  ol("import javax.servlet.http.*;");
1698  ol("import java.io.*;");
1699  ol("import java.util.*;");
1700  //write this in case (very rare) that a page overrides the 
1701  //Page.init()/destory methods [we need pageservlet for init(..)]
1702  ol("import fc.web.page.PageServlet;");
1703  for (int n = 0; n < imps.size(); n++) {
1704    ((Element)imps.get(n)).render();
1705    ol();
1706    }
1707  ol();
1708  }
1709
1710void writeFields()
1711  {
1712  }
1713
1714void writeConstructor()
1715  {
1716  }
1717
1718void writeMethods() throws IOException
1719  {
1720  writeDeclaredMethods();
1721  writeIncludedMethods();
1722  writeRenderMethod();
1723  }
1724  
1725void writeDeclaredMethods() throws IOException
1726  {
1727  for (int n = 0; n < decl.size(); n++) {
1728    ((Element)decl.get(n)).render();
1729    }
1730  
1731  if (decl.size() > 0)
1732    ol();
1733  }
1734
1735void writeIncludedMethods() throws IOException
1736  {
1737  for (int n = 0; n < inc_decl.size(); n++) {
1738    ((Element)inc_decl.get(n)).render();
1739    }
1740    
1741  if (inc_decl.size() > 0)
1742    ol();
1743  }
1744
1745void writeRenderMethod() throws IOException
1746  {
1747  if  (! includeMode) {
1748    writeRenderTop();
1749    }
1750
1751  //if (! (e instanceof Comment || e instanceof Decl || e instanceof MollyComment)) {
1752
1753  boolean removeInitialEmpty = directives.containsKey(d_remove_initial_whitespace);
1754
1755  boolean firstNonWhiteSpaceNodeSeen = false;
1756  
1757  //render entire tree
1758  for (int n = 0; n < tree.size(); n++) 
1759    {
1760    Element elem = (Element)tree.get(n);
1761    
1762    if (elem instanceof Text) {
1763      Text t = (Text) elem;
1764      //System.out.println("before whitespace removal: " + elem);
1765      if (removeInitialEmpty && ! firstNonWhiteSpaceNodeSeen) {
1766        if (t.isOnlyWhiteSpaceNode()) {
1767          t.clear();
1768          }
1769        else{
1770          t.removeInitialEmptyLines();
1771          firstNonWhiteSpaceNodeSeen = true;
1772          }
1773        }
1774      //System.out.println("after whitespace removal: " + elem);
1775      //System.out.println("---------------------------");
1776      }
1777  
1778    elem.render();
1779    }
1780    
1781  if (! includeMode) {
1782    writeRenderBottom();
1783    }
1784      
1785  }
1786  
1787void writeRenderTop() throws IOException
1788  {
1789  ol("public void render(HttpServletRequest req, HttpServletResponse res) throws Exception");
1790  ol("\t{");
1791    ol("  /* for people used to typing 'request/response' */");
1792  ol("  final HttpServletRequest  request = req;");
1793  ol("  final HttpServletResponse response = res;");
1794  ol();
1795  //mime+charset
1796  String content_type = "";
1797  if (directives.containsKey(d_mimetype)) 
1798    {
1799    String mtype = (String) directives.get(d_mimetype);
1800    if (!  (mtype.equals("") || mtype.equals(mimetype_none)) ) 
1801      {
1802      mtype = removeLeadingTrailingQuote(mtype);
1803      content_type += mtype;
1804      }
1805    } 
1806  else{
1807    content_type += Page.DEFAULT_MIME_TYPE;
1808    }
1809
1810    
1811  if (directives.containsKey(d_encoding)) {
1812    String encoding = (String) directives.get(d_encoding);
1813    encoding = removeLeadingTrailingQuote(encoding);
1814    /*an empty encoding means that the encoding is specified in the
1815    html header*/
1816    if (! encoding.trim().equals("")) { 
1817      content_type += "; charset=";
1818      content_type += encoding; 
1819      }
1820    }
1821  else{
1822    content_type += "; charset=";
1823    content_type += Page.DEFAULT_ENCODING;
1824    }
1825
1826  o ("  res.setContentType(\""); o (content_type); ol("\");");
1827
1828  //buffer
1829  if (directives.containsKey(d_buffersize)) {
1830    o ("  res.setBufferSize(");
1831    o (directives.get(d_buffersize));
1832    ol(");");
1833    }
1834    
1835  //stream or writer
1836  boolean stream = false;
1837  if (directives.containsKey(d_out)) 
1838    {
1839    String stream_type = ((String) directives.get(d_out)).toLowerCase().intern();
1840
1841    if (stream_type == d_out_stream1 || stream_type == d_out_stream2) {
1842      stream = true;
1843      }
1844    else if (stream_type == d_out_writer) {
1845      stream = false;
1846      }
1847    else{
1848      error("Did not understand directive [directive name=out, value=" + stream_type + "]. Choose between (" +  d_out_stream1 + ") and (" + d_out_writer + ")");
1849      }
1850    }
1851    
1852  if (stream)
1853    ol("  ServletOutputStream out = res.getOutputStream();");
1854  else
1855    ol("  PrintWriter out = res.getWriter();");
1856
1857  }
1858
1859void writeRenderBottom() throws IOException
1860  {
1861  ol();
1862  ol("\t} //~render end");
1863  }
1864
1865
1866/*
1867int tabcount = 1;
1868String tab = "\t";
1869void tabInc() {
1870  tab = StringUtil.repeat('\t', ++tabcount);
1871  }
1872void tabDec() {
1873  tab = StringUtil.repeat('\t', --tabcount);
1874  }
1875*/
1876
1877abstract class Element {
1878  abstract void render() throws IOException;
1879  //text, include etc., implement this as needed. 
1880  void addExp(Exp e) {  
1881    throw new RuntimeException("Internal error: not implemented by this object"); 
1882    }
1883  }
1884    
1885//this should NOT be added to the tree directly but added to Text or Hash
1886//via the addExp() method. This is because exps must be printed inline
1887class Exp extends Element
1888  {
1889  String str;
1890  
1891  Exp(CharArrayWriter buf) {
1892    this.str = buf.toString();
1893    if (dbg) dbgread("<new EXP> "+ str); 
1894    }
1895
1896  void render() {
1897    o("out.print  (");
1898    o(str);
1899    ol(");");
1900    }
1901    
1902  public String toString() {
1903    return "Exp: [" + str + "]";
1904    }
1905  }
1906
1907//this is of course mofo crazy but that's java regex embedded into java strings for you
1908final String whiteSpaceOnlyPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*$";
1909final String whiteSpaceBeginPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*";
1910
1911class Text extends Element
1912  {
1913  String  offset_space;
1914  final   List list = new ArrayList();
1915  int   nodeNumber = textNodeCounter++;
1916  
1917  //each text section is parsed by a text node. Within EACH text
1918  //node, we split it's contained text into separate lines and
1919  //generate code to print each line with a "out.println(...)"
1920  //statement. This maintains the same source order as the molly
1921  //page. If we munge together everything and print all of it's
1922  //contents with just one out.println(...)" statement, we would
1923  //get one large line with embedded \n and that would make
1924  //things more difficult to co-relate with the source file.
1925
1926  Text(final String offset, final CharArrayWriter b) 
1927    {
1928    if (offset == null) {
1929      offset_space = "\t";
1930      }
1931    else{
1932      offset_space = "\t" + offset;
1933      }
1934  
1935    final char[] buf = b.toCharArray();
1936
1937    boolean prevWasCR = false;
1938    //jdk default is 32. we say 256. not too large, maybe
1939    //less cache pressure. not too important, gets resized
1940    //as needed anyway.
1941    final CharArrayWriter tmp = new CharArrayWriter(256);
1942    
1943    //intead of tmp.size() > MAX_TEXT_LITERAL_LENGTH  in the first if statement below, make
1944    //things faster by avoiding a method call in a loop and using a local variable tcount
1945    int tcount = 0;
1946  
1947    for (int i=0, j=1; i < buf.length; i++, j++) 
1948      {
1949      char c = buf[i];
1950      tcount++;
1951      if (tcount > MAX_TEXT_LITERAL_LENGTH || j == buf.length) {
1952        tmp.append(quote(c));
1953        list.add(tmp.toString());
1954        tmp.reset();
1955        tcount = 0;
1956        }
1957      else if (c == '\n') {
1958        tmp.append(quote(c));
1959        if (! prevWasCR) {
1960          list.add(tmp.toString());
1961          tmp.reset();
1962          tcount = 0;
1963          }
1964        }
1965      else if (c == '\r') {
1966        tmp.append(quote(c));
1967        list.add(tmp.toString());
1968        tmp.reset();
1969        tcount = 0;
1970        prevWasCR = true;
1971        }
1972      else{
1973        tmp.append(quote(c));
1974        prevWasCR = false;
1975        }
1976      }
1977
1978    if (dbg) {
1979      String classname = getClass().getName();
1980      dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 
1981      }
1982    }
1983
1984  Text(CharArrayWriter b) 
1985    {
1986    this(null, b);
1987    }
1988    
1989  void addExp(Exp e)
1990    {
1991    list.add(e);
1992    }
1993
1994  void render() 
1995    {
1996    for (int i=0; i<list.size(); i++) 
1997      {
1998      Object obj = list.get(i); //can be String or Exp
1999      if (obj instanceof Exp) {
2000        o(offset_space);
2001        ((Exp)obj).render();
2002        }
2003      else{
2004        o(offset_space);
2005        o("out.print  (\"");
2006        o(obj);
2007        ol("\");"); 
2008        }
2009      }
2010    } //render
2011
2012  boolean isOnlyWhiteSpaceLine(String s)
2013    {
2014    return s.matches(whiteSpaceOnlyPat);
2015    }
2016
2017  // one or all lines (each line being a separate list item) in this text node are white space
2018  // example 3 lines: ['\n', '\n', ' \t\n'] -> true
2019  // example 3 lines: ['\n', '\n', Exp Node, ' \t\n'] -> false
2020  boolean isOnlyWhiteSpaceNode()
2021    { 
2022    for (int n = 0; n < list.size(); n++) {
2023      Object obj = list.get(n);
2024      if (! (obj instanceof String)) {
2025        return false;
2026        }
2027      if (! isOnlyWhiteSpaceLine((String)obj)) {
2028        return false;
2029        }
2030      } 
2031    return true;
2032    }
2033
2034  // some lines (each line being a separate list item) in this text node are white space
2035  // before: ['\n', '\n', ' \n\thello\n', Exp Node, '\n']
2036  // after:  ['hello\n', Exp Node, '\n']
2037  void removeInitialEmptyLines()
2038    {
2039    Iterator it = list.iterator();  //have to use iterator when removing while transversing
2040    while (it.hasNext()) 
2041      {
2042      Object obj = it.next();
2043      if (! (obj instanceof String)) {
2044        break;
2045        }
2046      String s = (String)obj;
2047      if (isOnlyWhiteSpaceLine(s)) {
2048        it.remove();
2049        }
2050      else{
2051        s.replaceFirst(whiteSpaceBeginPat, "");
2052        break;
2053        }
2054      }
2055    }
2056
2057  //clear all contents of this node - used only for white space removal   
2058  void clear()
2059    {
2060    list.clear();
2061    }
2062  
2063  public String toString() {
2064    StringBuilder buf = new StringBuilder();
2065    buf.append("Text (#" + nodeNumber + "):");
2066    if (list.size() == 0) {
2067      append("<EMPTY>");
2068      }
2069    else{
2070      for (int n = 0; n < list.size(); n++) {
2071        buf.append("[");
2072        buf.append(StringUtil.viewableAscii(String.valueOf(list.get(n))));
2073        buf.append("]");
2074        if (n + 1 < list.size()) {
2075          buf.append(",");
2076          }
2077        }
2078      }
2079    return buf.toString();
2080    }
2081  
2082  }
2083
2084class Hash extends Text
2085  {
2086  Hash(final String offset, final CharArrayWriter b) 
2087    {
2088    super(offset, b);
2089    }
2090
2091  //same as super.render() except for j == list.size() o/ol() below
2092  void render() 
2093    {
2094    for (int i=0, j=1; i<list.size(); i++, j++) 
2095      {
2096      Object obj = list.get(i); //can be String or Exp
2097      if (obj instanceof Exp) {
2098        o(offset_space);
2099        ((Exp)obj).render();
2100        }
2101      else{
2102        o(offset_space);
2103        o("out.print  (\"");
2104        o(obj);
2105        
2106        if (j == list.size()) 
2107          o ("\");");
2108        else
2109          ol("\");"); 
2110        }
2111      }
2112    } //render
2113
2114  public String toString() {
2115    return "Hash: " + list;
2116    }
2117  }
2118
2119class Heredoc extends Text
2120  {
2121  Heredoc(final CharArrayWriter buf) 
2122    {
2123    super(null, buf);
2124    }
2125
2126  //override, exp cannot be added to heredoc sections
2127  void addExp(Exp e)
2128    {
2129    throw new IllegalStateException("Internal implementation error: this method should not be called for a Heredoc object");
2130    }
2131    
2132  void render() 
2133    {
2134    for (int i=0, j=1; i<list.size(); i++, j++) 
2135      {
2136      Object obj = list.get(i); 
2137      o(offset_space);
2138      o("out.print  (\"");
2139      o(obj);
2140      ol("\");"); 
2141      }
2142    } //render
2143
2144  public String toString() {
2145    return "Heredoc: " + list;
2146    }
2147
2148  }
2149
2150class Code extends Element
2151  {
2152  List list = new ArrayList();
2153  
2154  Code(CharArrayWriter b) 
2155    {
2156    //we split the code section into separate lines and 
2157    //print each line with a out.print(...). This maintains
2158    //the same source order as the molly page. If we munge together
2159    //everything, we would get one large line with embedded \n
2160    //and that would make things more difficult to co-relate.
2161    final char[] buf = b.toCharArray();
2162    CharArrayWriter tmp = new CharArrayWriter();
2163    for (int i=0, j=1; i < buf.length; i++, j++) {
2164      char c = buf[i];   
2165      if (j == buf.length) { //end of buffer
2166        tmp.append(c);
2167        list.add(tmp.toString());
2168        tmp.reset();
2169        }
2170      else if (c == '\n') {
2171        tmp.append(c);
2172        list.add(tmp.toString());
2173        tmp.reset();
2174        }
2175      else
2176        tmp.append(c);
2177      }
2178    if (dbg) {
2179      String classname = getClass().getName();
2180      dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 
2181      }
2182    }
2183
2184  void render() {
2185    for (int i = 0; i < list.size(); i++) {
2186      o('\t');
2187      o(list.get(i));
2188      }
2189    }
2190    
2191  public String toString() {
2192    return "Code: " + list;
2193    }
2194  }
2195
2196class Comment extends Element
2197  {
2198  String str;
2199  
2200  Comment(CharArrayWriter buf) {
2201    this.str = buf.toString();
2202    if (dbg) dbgread("<new COMMENT> "+ str); 
2203    }
2204
2205  void render() {
2206    //we don't print commented sections
2207    }
2208
2209  public String toString() {
2210    return "Comment: [" + str + "]";
2211    }
2212  }
2213
2214class Decl extends Code
2215  {
2216  Decl(CharArrayWriter buf) {
2217    super(buf);
2218    }
2219
2220  void render() {
2221    for (int i = 0; i < list.size(); i++) {
2222      o (list.get(i));
2223      }
2224    }
2225  }
2226
2227/* base class for Forward and Include */
2228class ForwardIncludeElement extends Element
2229  {
2230  List    parts = new ArrayList();
2231  boolean useBuf = false;
2232  
2233  // the following is for includes with expressions 
2234  // [include foo[=i].html]  
2235  // i could be 1,2,3.. the parser adds the xpression [=i] to this
2236  // object if it's present via the addExp method below
2237  void add(CharArrayWriter buf)
2238    {
2239    parts.add(buf.toString().trim());
2240    if (parts.size() > 1) {
2241      useBuf = true;
2242      }
2243    }
2244
2245  void addExp(Exp e)
2246    {
2247    parts.add(e);
2248    useBuf = true;
2249    }
2250
2251  void render() throws IOException
2252    {
2253    if (parts.size() == 0) {
2254      //log.warn("possible internal error, parts.size()==0 in Forward");
2255      return;
2256      }
2257
2258    ol("\t{ //this code block gives 'rd' its own namespace");
2259  
2260    if (! useBuf) {
2261      o ("\tfinal RequestDispatcher rd = req.getRequestDispatcher(\"");
2262      //only 1 string
2263      o (removeLeadingTrailingQuote(parts.get(0).toString())); 
2264      ol("\");");
2265      }
2266    else{
2267      ol("\tfinal StringBuilder buf = new StringBuilder();");
2268      for (int n = 0; n < parts.size(); n++) {
2269        Object obj = parts.get(n);
2270        if ( n == 0 || (n + 1) == parts.size() ) {
2271          obj = removeLeadingTrailingQuote(obj.toString());
2272          }
2273        if (obj instanceof String) {
2274          o ("\tbuf.append(\"");
2275          o (obj);
2276          ol("\");");
2277          }
2278        else{
2279          o ("\tbuf.append(");
2280          o ( ((Exp)obj).str );
2281          ol(");");
2282          }
2283        } //for
2284      ol("\tfinal RequestDispatcher rd = req.getRequestDispatcher(buf.toString());");
2285      } //else
2286    }
2287
2288
2289  public String toString() {
2290    return "Forward: " + parts;
2291    }
2292  }
2293
2294/* a request dispatcher based include. */
2295class Include extends ForwardIncludeElement
2296  {
2297  Include() {
2298    if (dbg) dbgread("<new INCLUDE> "); 
2299    }
2300    
2301  void render() throws IOException
2302    {
2303    super.render();
2304    ol("\trd.include(req, res);");
2305    ol("\t}   //end rd block");
2306    }
2307
2308  /* uses parent toString */
2309  }
2310
2311/* a request dispatcher based forward */
2312class Forward extends ForwardIncludeElement
2313  {
2314  Forward() {
2315    if (dbg) dbgread("<new FORWARD>"); 
2316    }
2317
2318  void render() throws IOException
2319    {
2320    super.render();
2321    ol("\t//WARNING: any uncommitted page content before this forward will be discarded.");
2322    ol("\t//If the response has already been committed an exception will be thrown. ");
2323
2324    ol("\trd.forward(req, res);");
2325
2326    ol("\t//NOTE: You should 'return' right after this line. There should be no content in your ");
2327    ol("\t//page after the forward statement");
2328    ol("\t}   //end rd block");
2329    }
2330
2331  /* uses parent toString */
2332  }
2333
2334
2335/* a molly mechanism to include an external file containing code and method
2336   declarations. These are typically commom utility methods and global
2337   vars. The included file is not parsed by the molly parser... the contents
2338   are treated as if they were written directly inside a [!....!] block.
2339*/ 
2340class IncludeDecl extends Element
2341  {
2342  String str;
2343  String opt;
2344  
2345  IncludeDecl(CharArrayWriter buf) {
2346    if (dbg) dbgread("<new INCLUDE-DECL> "); 
2347    str = removeLeadingTrailingQuote(buf.toString().trim());
2348    }
2349  
2350  void setOption(String opt) {
2351    this.opt = opt;
2352    }
2353  
2354  void render() throws IOException
2355    {
2356    File f = null;
2357    File parentDir = inputFile.getParentFile();
2358    if (parentDir == null) {
2359      parentDir = new File(".");
2360      }
2361
2362    final int strlen = str.length();
2363    
2364    if (str.startsWith("\"") || str.startsWith("'")) 
2365      {
2366      if (strlen == 1) //just " or ' 
2367        throw new IOException("Bad include file name: " + str);
2368        
2369      str = str.substring(1, strlen);
2370      }
2371
2372    if (str.endsWith("\"") || str.endsWith("'")) 
2373      {
2374      if (strlen == 1) //just " or ' 
2375        throw new IOException("Bad include file name: " + str);
2376        
2377      str = str.substring(0, strlen-1);
2378      }
2379
2380    if (str.startsWith("/"))
2381      f = new File(contextRoot, str);
2382    else
2383      f = new File(parentDir, str);
2384    
2385    /* f = new File(parentDir, str); */
2386    
2387    if (! f.exists()) {
2388      throw new IOException("Include file does not exist: " + f.getCanonicalPath());
2389      }
2390
2391    o("//>>>START INCLUDE DECLARTIONS from: ");
2392    o(f.getAbsolutePath());
2393    ol();
2394        
2395    o(IOUtil.inputStreamToString(new FileInputStream(f)));
2396  
2397    o("//>>>END INCLUDE DECLARATIONS from: ");
2398    o(f.getAbsolutePath());
2399    ol();
2400    
2401    //circularities are tricky, later
2402    //includeMap.put(pageloc, f.getCanonicalPath());
2403    }
2404
2405  public String toString() {
2406    return "IncludeDecl: [" + str + "; options: " + opt + "]";
2407    }
2408  }
2409
2410class Import extends Code
2411  {
2412  Import(CharArrayWriter buf) {
2413    super(buf);
2414    }
2415
2416  void render() {
2417    for (int i = 0; i < list.size(); i++) {
2418      o (list.get(i));
2419      }
2420    }
2421  }
2422
2423class MollyComment extends Element
2424  {
2425  String str;
2426  
2427  MollyComment(String str) {
2428    this.str = str;
2429    if (dbg) dbgread("<new MollyComment> "+ str); 
2430    }
2431
2432  void render() {
2433    ol(str);
2434    }
2435    
2436  public String toString() {
2437    return "MollyComment: [" + str + "]";
2438    }
2439  }
2440  
2441/**
2442removes starting and trailing single/double quotes. used by the
2443include/forward render methods only, NOT used while parsing.
2444*/
2445private static String removeLeadingTrailingQuote(String str)
2446  {
2447  if (str == null)
2448    return str;
2449
2450  if ( str.startsWith("\"") || str.startsWith("'") )  {
2451    str = str.substring(1, str.length());
2452    }
2453
2454  if ( str.endsWith("\"") || str.endsWith("'") ) {
2455    str = str.substring(0, str.length()-1); 
2456    }
2457
2458  return str;
2459  }
2460
2461//===============================================
2462
2463public static void main (String args[]) throws IOException
2464  {
2465  Args myargs = new Args(args);
2466  myargs.setUsage("java " + myargs.getMainClassName() 
2467    + "\n"
2468      + "Required params:\n"
2469    + "     -classname output_class_name\n" 
2470    + "     -in        input_page_file\n"
2471    + "\nOptional params:\n" 
2472    + "     -encoding    <page_encoding>\n"
2473    + "     -contextRoot <webapp root-directory or any other directory>\n"
2474    + "        this directory is used as the starting directory for absolute (starting\n"
2475    + "        with a \"/\") include/forward directives in a page>. If not specified\n"
2476    + "        defaults to the same directory as the page file\n"
2477    + "     -out <output_file_name>\n"
2478    + "        the output file is optional and defaults to the standard out if not specified."
2479    );
2480  //String encoding = myargs.get("encoding", Page.DEFAULT_ENCODING);
2481
2482  File input     = new File(myargs.getRequired("in"));
2483  File contextRoot = null;
2484  
2485  if (myargs.flagExists("contextRoot"))
2486    contextRoot = new File(myargs.get("contextRoot"));
2487  else
2488    contextRoot = input;
2489
2490  PrintWriter output;
2491  
2492  if (myargs.get("out") != null)
2493    output = new PrintWriter(new FileWriter(myargs.get("out")));
2494  else
2495    output = new PrintWriter(new OutputStreamWriter(System.out));
2496    
2497  PageParser parser = new PageParser(contextRoot, input, output, myargs.getRequired("classname"), Log.getDefault());
2498  parser.parse();
2499  }
2500
2501}