001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 002// The Molly framework is freely distributable under the terms of an 003// MIT-style license. For details, see the molly pages web site at: 004// http://www.mollypages.org/. Use, modify, have fun ! 005 006package fc.web.page; 007 008import java.io.*; 009import java.util.*; 010import java.util.regex.*; 011import fc.io.*; 012import fc.util.*; 013 014/* 015NOTES 016 017Code blocks of the form 018 [...] 019cause problems with java arrays 020 021String[] or foo[4] etc.., craps out. So we need to use 022 [[...]] 023for the molly code blocks 024 0251. If you are hacking this file, start with parseText() 026 0272. Turn the dbg flag to true to see how the parser works 028 0293. Keep in mind that the order of switch'es in a case statement in various 030methods is not always arbitrary (the order matters in this sort 031of recursive descent parsing) 032 0334. Read www.mollypages.org/page/grammar/index.mp for a intro to parsing 034 0355. This parser as shipped has a set of regression tests in the fc/web/page/test directory. 036These consist of a bunch of *.mp files and corresponding *.java files, each of which is 037known to be generated properly. If you change stuff around, run these regression tests 038again by invoking "java fc.web.page.PageParserTest" Note, if you change things such that 039the .java output of the parser is different, then the tests will fail (since the new .java 040files of your parser will be different to the test ones shipped in fc/web/page/test. In 041this case, once you know that your parser works as you like it, then you should create a 042new baseline for your parser by invoking "java fc.web.page.PageParserTest 043-generateExpected" and then you can use *that* as the new baseline for further changes in 044your parser (you may have to modify the *.mp files in /fc/web/page/test to use your new 045page syntax). 046 0476. 048When including files, previous versions of the parser constructed a new IncludeFile element 049which would be invoked when the page was written out. When invoked (via render), that element 050would creaet a new PageParser and set includeMode = true on that new parser. This new parser 051would then parse/write out the subtree of the included file in-line. 052 053This inline processing had issues since the included file could not contain import statements, 054declarations, etc (since those had already been written out by the parent/top level parser). 055Another hack was to pass the child parser the parent/top level object and give access to the 056top level parse root to the child parser (the child parser would have to be invoked immediaately 057anyway). Also, since inner classes for parse elements are non-static, the separate parser would 058create a parse tree, and although it would add those classes to the top most parse tree, the 059classes themselves (when trying to write) would refer to the separate output stream of the child 060class (the output stream would also have to be set to the parent class). It was doaable but gets 061un-neccessarily complex. 062 063The only benefit to a seperate parser being able to print start/end sections: 064>> start include file 065 [..invoke child parser] 066>> end include file 067 068In the current/cleaner approach, I simply insert the included file into the character stream. 069But there isn't any easy way to track when that stream finishes and the original content starts 070again. So we get: 071 072>> start include file 073[..include into original stream and continue parsing] 074-- no end include file line -- 075 076*/ 077 078/** 079Parses a page and writes out the corresponding java file to the specified output. The 080parser and scanner is combined into one class here for simplicity (a seperate scanner is 081overkill for a simple LL(1) grammar such as molly pages). 082 083@author hursh jain 084*/ 085public final class PageParser 086{ 087private static final boolean dbg = false; 088private static final int EOF = -1; 089private int dbgtab = 0; 090 091String classname; 092String packagename = Page.PACKAGE_NAME; 093PageReader in; 094PrintWriter out; 095Log log; 096File inputFile; 097File outputFile; 098File contextRoot; 099boolean includeMode = false; 100String src_encoding; 101int textNodeCounter = 0; 102 103//Read data 104//we use these since stringbuffer/builders do not have a clear/reset function 105CharArrayWriter buf = new CharArrayWriter(4096); 106CharArrayWriter wsbuf = new CharArrayWriter(32); // ^(whitespace)* 107int c = EOF; 108 109//PageData 110List decl = new ArrayList(); //declarations 111List inc_decl = new ArrayList(); //external included declarations 112List imps = new ArrayList(); //imports 113List tree = new ArrayList(); //code, exp, text etc. 114Map directives = new HashMap(); //page options 115Set circularityTrack = new HashSet(); //track pages already included to stop circular refs 116 117/** 118The name ("mimetype") of the[@ mimetype=....] directive. The value of <tt>none</tt> or an 119empty string will turn off writing any mimetype entirely (the user can then write a 120mimetype via the {@link javax.servlet.ServletResponse.setContentType} method manually). 121<p> 122Note, from {@link 123javax.servlet.ServletResponse.setContentType ServletResponse} 124<pre> 125Note that the character encoding cannot be communicated via HTTP headers if the servlet 126does not specify a content type; however, it is still used to encode text written via the 127servlet response's writer. 128</pre> 129*/ 130public static String d_mimetype = "mimetype"; 131 132/* 133this value (or an empty string) for mimetype means no mimetype 134will be specified (not even the default mimetype) 135*/ 136public static String mimetype_none = "none"; 137 138/** 139The name ("encoding") of the [page encoding=....] directive. 140*/ 141public static String d_encoding = "encoding"; 142 143/** 144The name ("src-encoding") of the [page src-encoding=....] directive. 145*/ 146public static String d_src_encoding = "src-encoding"; 147 148/** The name ("buffersize") of the [page buffersize=....] directive */ 149public static String d_buffersize = "buffersize"; 150 151/** The name ("out") of the [page out=....] directive */ 152public static String d_out = "out"; 153/** A value ("outputstream") of the [page out=outputstream] directive */ 154public static String d_out_stream1 = "outputstream"; 155/** A value ("outputstream") of the [page out=stream] directive */ 156public static String d_out_stream2 = "stream"; 157/** A value ("writer") of the [page out=writer] directive */ 158public static String d_out_writer = "writer"; 159/** The name of the ("remove-initial-whitespace") directive */ 160public static String d_remove_initial_whitespace = "remove-initial-whitespace"; 161 162/* 163Any ".." will be saved as a constant in the constant pool in the class file by javac 164Although java strings when stored ont heap and referenced by a variable are 165uptp Integer.MAX_VALUE - 1 (2,147,483,647 bytes, ~ 1 GB unicode) in length, 166the constant pool strings are way smaller - 65,535K but not 100% clear and subject to future 167change? 168 169We break any long lines beyond 65535 / 2 chars to multiple out("...") statements, where each out statement 170contains 32772 chars max. This is used by text nodes for example. Relevant when a massive chunk of 171html text exists without any newlines. Each character can double when quoted (" -> \") so to be absolutey 172safe, we approx. half the 65535 number. 173*/ 174private static final int MAX_TEXT_LITERAL_LENGTH = 32772; 175 176/* 177This constructor for internal use. 178 179The parser can be invoked recursively to parse included files as 180well..that's what the includeMode() does (and this construtor is invoked 181when including). When including, we already have a output writer 182created, we use that writer (instead of creating a new one based on 183src_encoding as we do for in normal page parsing mode). 184*/ 185private PageParser( 186 File contextRoot, File input, PrintWriter outputWriter, String classname, Log log) 187throws IOException 188 { 189 this.contextRoot = contextRoot; 190 this.inputFile = input; 191 this.in = new PageReader(input); 192 this.out = outputWriter; 193 this.classname = classname; 194 this.log = log; 195 196 circularityTrack.add(input.getAbsolutePath()); 197 } 198 199/** 200Creates a new page parser that will use the default log obtained by 201{@link Log#getDefault} 202 203@param contextRoot absolute path to the webapp context root directory 204@param input absolute path to the input page file 205@param input absolute path to the output file (to be written to). 206@param classname classname to give to the generated java class. 207*/ 208public PageParser(File contextRoot, File input, File output, String classname) 209throws IOException 210 { 211 this(contextRoot, input, output, classname, Log.getDefault()); 212 } 213 214/** 215Creates a new page parser. 216 217@param contextRoot absolute path to the webapp context root directory 218@param input absolute path to the input page file 219@param output absolute path to the output file (to be written to). 220@param classname classname to give to the generated java class. 221@log log destination for internal logging output. 222*/ 223public PageParser( 224 File contextRoot, File input, File output, String classname, Log log) 225throws IOException 226 { 227 this.contextRoot = contextRoot; 228 this.inputFile = input; 229 this.in = new PageReader(input); 230 this.outputFile = output; 231 this.classname = classname; 232 this.log = log; 233 234 circularityTrack.add(input.getAbsolutePath()); 235 } 236 237void append(final int c) 238 { 239 Argcheck.istrue(c >= 0, "Internal error: recieved c=" + c); 240 buf.append((char)c); 241 } 242 243void append(final char c) 244 { 245 buf.append(c); 246 } 247 248void append(final String str) 249 { 250 buf.append(str); 251 } 252 253/* not used anymore */ 254PageParser includeMode() 255 { 256 includeMode = true; 257 return this; 258 } 259 260/** 261Parses the page. If the parse is successful, the java source will be 262generated. 263 264@throws IOException a parse failure occurred. The java source file 265 may or may not be properly generated or written 266 in this case. 267*/ 268public void parse() throws IOException 269 { 270 parseText(); 271 272 if (! includeMode) 273 { 274 writePage(); 275 out.close(); 276 } 277 else{ 278 out.flush(); 279 } 280 281 in.close(); 282 } 283 284//util method for use in the case '[' branch of parseText below. 285private Text newTextNode() 286 { 287 Text text = new Text(buf); 288 tree.add(text); 289 buf.reset(); 290 return text; 291 } 292 293 /* 294 Things always start off with a text node - all tags will be then processed as/when seen. 295 296 If there is any white space at the beginning (before any other directives/tags are seen), that 297 will become part of the first text node. Any white space *after* a directive/tag will become part 298 of the subsequent text node. 299 300 This is relevant when removing initial whitespace (if a directive to do so exists). If such a directive 301 exists, the strategy is to keep going down our list of parsed nodes, remove any text nodes that are 302 only whitespace, and then ignore any leading whitespace from the first non-empty text node we find. 303 304 The textnode itself escapes any whitespace with string escapes (a newline -> "\n") since that is 305 fed to the page out(..) as a string. So, to remove whitespaces once they are already escaped into 306 strings in the textnode data, we have to remove these escaped versions of those whitespaces. 307 308 To remove a newline, we have to search for "\n". However if the user had typed "\n" to begin with, 309 that would be converted to "\\n", so it gets very tricky to distinguish between what was typed 310 in the page and what we escaped. 311 312 In example (1), the page itself has 2 characters typed: [\, n] and so on. Anything not <NL> 313 is actually typed as a character in the page. 314 315 source text node output in browser 316 \n \\n out.print("\\n") \n (1) 317 \<NL> \\\n out.print("\\\n") \ (2) 318 \n<NL> \\n\n out.print("\\n\n") \n<NL> (3) 319 320 Carrying on: 321 \\n<NL> \\\\n\n out.print("\\\\n\n") \\n<NL> (4) 322 \\\n<NL> \\\\\\n\n out.print("\\\\\\n\n") \\\n<NL> (5) 323 324 To eliminate whitespaces, NL in this example, we have to search for: \n *BUT* ignore any \\n 325 326 \n \\n out.print("\\n") \n (1) 327 \\n "\\n" \n 1a: no newline in src! 328 329 \<NL> \\\n out.print("\\n\n") \<NL> (2) 330 \\ [\n] "\\" \ 2a: NL in src gone! 331 332 \n<NL> \\n\n out.print("\\n\n") \n<NL> (3) 333 \\n [\n] "\\n" \n 3a: NL in src gone! 334 335 \\n<NL> \\\\n\n out.print("\\\\n\n") \\n<NL> (4) 336 \\ \\n [\n] "\\\\n" \\n 4a: NL in src gone! 337 338 \\\n<NL> \\\\\\n\n out.print("\\\\\n\n") \\\n<NL> (5) 339 \\\n<NL> \\ \\ \\n [\n] out.print("\\\\\\n") \\\n 5a: NL in src gone! 340 341 It is quite hokey. If we use \s+ regular expression, that will search for actual NL characters. 342 What we want is to search for '\','n' as long as it is not '\','\','n' 343 344 We could do this before we escape anything in the Text node itself (as the lexical level) but I've 345 done is post lexical - at writeRenderMethod(). Doing in after the parse step does not touch any 346 existing parsing/lexing code so its safer, a one and done deal right now basically. And when parsing, 347 Text nodes break multiple lines (separated by NL) into separate internal items in a list which makes 348 it easier for us - We just have to examine each internal item as a separate line. 349 350 Doing it while lexing - using in.skipWhitespace() - is also tricky because parsing is recursive 351 and we have to know when to invoke the skip and when not too. 352 */ 353void parseText() throws IOException 354 { 355 if (dbg) dbgenter(); 356 357 while (true) 358 { 359 c = in.read(); 360 361 if (c == EOF) { 362 tree.add(new Text(buf)); 363 buf.reset(); 364 break; 365 } 366 367 switch (c) 368 { 369 //Escape start tags 370 case '\\': 371 /* we don't need to do this: previously, expressions 372 were [...] but now they are [=...], previously we needed 373 to escape \[[ entirely (since if we escaped \[ the second 374 [ would start an expression 375 */ 376 /* 377 if (in.match("[[")) 378 append("[["); 379 */ 380 //escape only \[... otherwise leave \ alone 381 if (in.match("[")) 382 append("["); 383 else 384 append(c); 385 break; 386 387 case '[': 388 /* suppose we have 389 \[[ 390 escape handling above will capture \[ 391 then the second '[' drops down here. Good so far. 392 But we must not create a new text object here by 393 default...only if we see another [[ or [= or [include or 394 whatever. 395 */ 396 /* 397 But creating a text object at the top is easier 398 then repeating this code at every if..else branch below 399 but this creates superfluous line breaks. 400 401 hello[haha]world 402 -->prints as--> 403 hello (text node 1) 404 [haha] (text node 2) 405 world (text node 3) 406 --> we want 407 hello[haha]world (text node 1) 408 */ 409 410 if (in.match('[')) { 411 newTextNode(); 412 parseCode(); 413 } 414 else if (in.match('=')) { 415 Text text = newTextNode(); 416 parseExpression(text); 417 } 418 else if (in.match('!')) { 419 newTextNode(); 420 parseDeclaration(); 421 } 422 else if (in.match("/*")) { 423 newTextNode(); 424 parseComment(); 425 } 426 else if (in.matchIgnoreCase("page")) { 427 newTextNode(); 428 parseDirective(); 429 } 430 //longest match: "include-file" etc., last: "include" 431 else if (in.matchIgnoreCase("include-file")) { 432 newTextNode(); 433 parseIncludeFile(); 434 } 435 else if (in.matchIgnoreCase("include-decl")) { 436 newTextNode(); 437 parseIncludeDecl(); 438 } 439 else if (in.matchIgnoreCase("include")) { 440 newTextNode(); 441 parseInclude(); 442 } 443 else if (in.matchIgnoreCase("forward")) { 444 newTextNode(); 445 parseForward(); 446 } 447 else if (in.matchIgnoreCase("import")) { 448 newTextNode(); 449 parseImport(); 450 } 451 else { 452 //System.out.println("c1=" + (char)c); 453 append(c); 454 } 455 break; 456 457 default: 458 //System.out.println("c2=" + (char)c); 459 append(c); 460 461 } //switch 462 } //while 463 464 if (dbg) dbgexit(); 465 } 466 467void parseCode() throws IOException 468 { 469 if (dbg) dbgenter(); 470 471 int startline = in.getLine(); 472 int startcol = in.getCol(); 473 474 while (true) 475 { 476 c = in.read(); 477 478 switch (c) /* the order of case tags is important. */ 479 { 480 case EOF: 481 unclosed("code", startline, startcol); 482 if (dbg) dbgexit(); 483 return; 484 485 case '/': //Top level: // and /* comments 486 append(c); 487 c = in.read(); 488 append(c); 489 if (c == '/') 490 appendCodeSlashComment(); 491 else if (c == '*') 492 appendCodeStarComment(); 493 break; 494 495 case '"': //strings outside of any comment 496 append(c); 497 appendCodeString(); 498 break; 499 500 case '\'': 501 append(c); 502 appendCodeCharLiteral(); 503 break; 504 505 case ']': 506 if (in.match(']')) { 507 tree.add(new Code(buf)); 508 buf.reset(); 509 if (dbg) dbgexit(); 510 return; 511 } 512 else { 513 append(c); 514 } 515 break; 516 517 /* 518 a hash by itself on a line starts a hash section. 519 whitespace before the # on that line is used as an 520 printing 'out' statements for that hash. 521 522 for (int n = 0; n < ; n++) { 523 ....# foo # 524 | } 525 |=> 4 spaces 526 so nice if generated code looked like: 527 528 for (int n = 0; n < ; n++) { 529 out.print(" foo "); 530 } 531 */ 532 case '\n': 533 case '\r': 534 append(c); //the \n or \r just read 535 readToFirstNonWS(); //won't read past more newlines 536 //is '#' is first non-ws on this line ? 537 c = in.read(); 538 if (c == '#') { 539 tree.add(new Code(buf)); 540 buf.reset(); 541 //whitespace provides indentation offset 542 parseHash(wsbuf.toString()); 543 } 544 else{ 545 append(wsbuf.toString()); //wsbuf contains codetext 546 //let other cases also handle first non-ws or EOF 547 in.unread(); 548 } 549 break; 550 551 /* in this case, hash does not start on a new line, like: 552 for (...) { # 553 */ 554 case '#': 555 tree.add(new Code(buf)); 556 buf.reset(); 557 parseHash(null); 558 break; 559 560 default: 561 append(c); 562 } //switch 563 } //while 564 } 565 566void parseHash(String offset) throws IOException 567 { 568 if (dbg) dbgenter(); 569 570 int startline = in.getLine(); 571 int startcol = in.getCol(); 572 573 while (true) 574 { 575 c = in.read(); 576 577 switch (c) 578 { 579 case EOF: 580 unclosed("hash", startline, startcol); 581 if (dbg) dbgexit(); 582 return; 583 584 //special case: very common and would be a drag to escape 585 //this every time: 586 // # <table bgcolor="#ffffff">.... # 587 //Now, all of: 588 // bgcolor="#xxx" 589 // bgcolor='#xxx' 590 // bgcolor="\#xxx" 591 //will work the same and give: bgcolor="#xxx" 592 //1) 593 //However to get a: 594 // bgcolor=#xxx (no quoted around #xxx) 595 //we still have to say: 596 // bgcolor=\#xxx 597 //2) 598 //Of course, since we special case this, then: 599 // #"bar"# 600 // that ending # is lost and we end up with 601 // #"bar" with no closing hash 602 //So we need to make sure that we write: 603 // #"bar" # 604 // instead 605 606 case '\'': 607 case '"': 608 append(c); 609 if (in.match('#')) 610 append('#'); 611 break; 612 613 case '\\': 614 if (in.match('[')) 615 append('['); 616 else if (in.match('#')) 617 append('#'); 618 else 619 append(c); 620 break; 621 622 case '[': 623 if (in.match('=')) { 624 Hash hash = new Hash(offset, buf); 625 tree.add(hash); 626 buf.reset(); 627 parseExpression(hash); 628 } 629 else{ 630 append(c); 631 } 632 break; 633 634 /* 635 this case is not needed but is a bit of a optimization 636 for (int n = 0; n < 1; n++) { 637 # 638 foo 639 ....#...NL 640 } 641 avoids printing the dots (spaces) and NL in this case 642 (the newline after foo is still printed) 643 */ 644 case '\n': 645 case '\r': 646 append(c); 647 readToFirstNonWS(); 648 c = in.read(); 649 //'#' is first non-ws on the line 650 if (c == '#') { 651 tree.add(new Hash(offset, buf)); 652 buf.reset(); 653 //skipIfWhitespaceToEnd(); 654 if (dbg) dbgexit(); 655 return; 656 } 657 else { 658 append(wsbuf.toString()); 659 in.unread(); //let other cases also handle first non-ws 660 } 661 break; 662 663 case '#': 664 tree.add(new Hash(offset, buf)); 665 //skipIfWhitespaceToEnd(); 666 buf.reset(); 667 if (dbg) dbgexit(); 668 return; 669 670 default: 671 append(c); 672 } //switch 673 } //while 674 } 675 676/** 677[page <<<FOO] 678...as-is..no parse, no interpolation.. 679FOO 680*/ 681void parseHeredoc(StringBuilder directives_buf) throws IOException 682 { 683 if (dbg) dbgenter(); 684 685 int startline = in.getLine(); 686 int startcol = in.getCol(); 687 688 int i = directives_buf.indexOf("<<<"); /* "<<<".length = 3 */ 689 CharSequence subseq = directives_buf.substring( 690 i+3, 691 /*directives_buf does not have a ending ']' */ 692 directives_buf.length() 693 ); 694 695 final String heredoc = subseq.toString().trim(); 696 final int heredoc_len = heredoc.length(); 697 final CharArrayWriter heredoc_buf = new CharArrayWriter(2048); 698 699 /* 700 the ending heredoc after newline speeds things up a bit 701 which is why is traditionally used i guess, otherwise 702 we have to try a full match every first match. this 703 implementation doesn't care where the ending heredoc 704 appears (can be anywhere)...simplifies the implementation. 705 */ 706 707 while (true) 708 { 709 c = in.read(); 710 711 if (c == EOF) { 712 unclosed("heredoc: <<<"+heredoc, startline, startcol); 713 break; 714 } 715 716 if (c == heredoc.charAt(0)) 717 { 718 boolean matched = true; 719 if (heredoc_len > 1) { 720 matched = in.match(heredoc.substring(1)); 721 } 722 if (matched) { 723 tree.add(new Heredoc(heredoc_buf)); 724 break; 725 } 726 } 727 728 //default action 729 heredoc_buf.append((char)c); 730 } //while 731 732 if (dbg) dbgexit(); 733 } 734 735/* 736Text is the parent node for the expression. A new expression is parsed, 737created and added to the text object by this method 738*/ 739void parseExpression(Element parent) throws IOException 740 { 741 if (dbg) dbgenter(); 742 743 int startline = in.getLine(); 744 int startcol = in.getCol(); 745 746 while (true) 747 { 748 c = in.read(); 749 750 switch (c) 751 { 752 case EOF: 753 unclosed("expression", startline, startcol); 754 if (dbg) dbgexit(); 755 return; 756 757 case '\\': 758 if (in.match(']')) 759 append(']'); 760 else 761 append(c); 762 break; 763 764 case ']': 765 if (buf.toString().trim().length() == 0) 766 error("Empty expression not allowed", startline, startcol); 767 parent.addExp(new Exp(buf)); 768 buf.reset(); 769 if (dbg) dbgexit(); 770 return; 771 772 default: 773 append(c); 774 } 775 } 776 } 777 778void parseComment() throws IOException 779 { 780 if (dbg) dbgenter(); 781 782 int startline = in.getLine(); 783 int startcol = in.getCol(); 784 785 while (true) 786 { 787 c = in.read(); 788 789 switch (c) 790 { 791 case EOF: 792 unclosed("comment", startline, startcol); 793 if (dbg) dbgexit(); 794 return; 795 796 case '*': 797 if (in.match("/]")) 798 { 799 tree.add(new Comment(buf)); 800 buf.reset(); 801 if (dbg) dbgexit(); 802 return; 803 } 804 else 805 append(c); 806 break; 807 808 default: 809 append(c); 810 } 811 } 812 } 813 814void parseDeclaration() throws IOException 815 { 816 if (dbg) dbgenter(); 817 int startline = in.getLine(); 818 int startcol = in.getCol(); 819 820 while (true) 821 { 822 c = in.read(); 823 824 switch (c) 825 { 826 case EOF: 827 unclosed("declaration", startline, startcol); 828 if (dbg) dbgexit(); 829 return; 830 831 case '!': 832 if (in.match(']')) { 833 decl.add(new Decl(buf)); 834 buf.reset(); 835 if (dbg) dbgexit(); 836 return; 837 } 838 else{ 839 append(c); 840 } 841 break; 842 843 //top level // and /* comments, ']' (close decl tag) 844 //is ignored within them 845 case '/': 846 append(c); 847 c = in.read(); 848 append(c); 849 if (c == '/') 850 appendCodeSlashComment(); 851 else if (c == '*') 852 appendCodeStarComment(); 853 break; 854 855 //close tags are ignored within them 856 case '"': //strings outside of any comment 857 append(c); 858 appendCodeString(); 859 break; 860 861 case '\'': 862 append(c); 863 appendCodeCharLiteral(); 864 break; 865 866 default: 867 append(c); 868 } 869 } 870 871 } 872 873void parseDirective() throws IOException 874 { 875 if (dbg) dbgenter(); 876 877 int startline = in.getLine(); 878 int startcol = in.getCol(); 879 880 StringBuilder directives_buf = new StringBuilder(1024); 881 882 while (true) 883 { 884 c = in.read(); 885 886 switch (c) 887 { 888 case EOF: 889 unclosed("directive", startline, startcol); 890 if (dbg) dbgexit(); 891 return; 892 893 case ']': 894 if (directives_buf.indexOf("<<<") >= 0) { 895 parseHeredoc(directives_buf); 896 } 897 else{/* other directives used at page-generation time */ 898 addDirectives(directives_buf); 899 } 900 901 if (dbg) dbgexit(); 902 return; 903 904 default: 905 directives_buf.append((char)c); 906 } 907 } 908 909 } 910 911//[a-zA-Z_\-0-9] == ( \w | - ) 912static final Pattern directive_pat = Pattern.compile( 913 //foo = "bar baz" (embd. spaces) 914 "\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*\"((?:.|\r|\n)+?)\"" 915 + "|" 916 //foo = "bar$@#$" (no spaces) OR foo = bar (quotes optional) 917 + "\\s*([a-zA-Z_\\-0-9]+)\\s*=\\s*(\\S+)" 918 ); 919 920 921void addDirectives(StringBuilder directives_buf) throws ParseException 922 { 923 if (dbg) { 924 dbgenter(); 925 System.out.println("-------directives section--------"); 926 System.out.println(directives_buf.toString()); 927 System.out.println("-------end directives-------"); 928 } 929 930 String name, value; 931 try { 932 Matcher m = directive_pat.matcher(directives_buf); 933 while (m.find()) 934 { 935 if (dbg) System.out.println(">>>>[0]->" + m.group() 936 + "; [1]->" + m.group(1) 937 + " [2]->" + m.group(2) 938 + " [3]->" + m.group(3) 939 + " [4]->" + m.group(4)); 940 941 name = m.group(1) != null ? m.group(1).toLowerCase() : 942 m.group(3).toLowerCase(); 943 value = m.group(2) != null ? m.group(2).toLowerCase() : 944 m.group(4).toLowerCase(); 945 946 if (name.equals(d_buffersize)) 947 { 948 //can throw parse exception 949 directives.put(name, 950 IOUtil.stringToFileSize(value.replace("\"|'",""))); 951 } 952 else if (name.equals(d_encoding)) { 953 directives.put(name, value.replace("\"|'","")); 954 } 955 else if (name.equals(d_src_encoding)) { 956 directives.put(name, value.replace("\"|'","")); 957 } 958 else if (name.equals(d_mimetype)) { 959 directives.put(name, value.replace("\"|'","")); 960 } 961 else if (name.equals(d_out)) { 962 directives.put(name, value.replace("\"|'","")); 963 } 964 else if (name.equals(d_remove_initial_whitespace)) { 965 directives.put(name, value.replace("\"|'","")); 966 } 967 //else if .... other directives here as needed.... 968 else 969 throw new Exception("Do not understand directive: " + m.group()); 970 } 971 if (dbg) System.out.println("Added directives: " + directives); 972 } 973 catch (Exception e) { 974 throw new ParseException("File: " + inputFile.getAbsolutePath() 975 + ";\n" + e.toString()); 976 } 977 978 if (dbg) dbgexit(); 979 } 980 981void parseIncludeFile() throws IOException 982 { 983 if (dbg) dbgenter(); 984 985 int startline = in.getLine(); 986 int startcol = in.getCol(); 987 String option = null; 988 989 while (true) 990 { 991 c = in.read(); 992 993 switch (c) 994 { 995 case EOF: 996 unclosed("include-file", startline, startcol); 997 if (dbg) dbgexit(); 998 return; 999 1000 case '[': 1001 if (in.match('=')) { 1002 //log.warn("Expressions cannot exist in file includes. Ignoring \"[=\" 1003 //in [include-file... section starting at:", startline, startcol); 1004 //instead of warn, we will error out. failing early is better. 1005 //this does preclude having '[=' in the file name, but it's a good 1006 //tradeoff 1007 error("Expressions cannot exist in file includes. The offending static-include section starts at:", startline, startcol); 1008 } 1009 append(c); 1010 break; 1011 1012 case ']': 1013 includeFile(buf, option); /* not added in the tree, just included in the stream */ 1014 buf.reset(); 1015 if (dbg) dbgexit(); 1016 return; 1017 1018 case 'o': 1019 if (! in.match("ption")) 1020 append(c); 1021 else{ 1022 skipWS(); 1023 if (! in.match("=")) { 1024 error("bad option parameter in file include: ", startline, startcol); 1025 } 1026 skipWS(); 1027 1028 int c2; 1029 StringBuilder optionbuf = new StringBuilder(); 1030 while (true) { 1031 c2 = in.read(); 1032 if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) { 1033 in.unread(); 1034 break; 1035 } 1036 optionbuf.append((char)c2); 1037 } 1038 1039 option = optionbuf.toString(); 1040 //System.out.println(option); 1041 } //else 1042 break; 1043 1044 default: 1045 append(c); 1046 } 1047 } 1048 } 1049 1050void parseIncludeDecl() throws IOException 1051 { 1052 if (dbg) dbgenter(); 1053 1054 int startline = in.getLine(); 1055 int startcol = in.getCol(); 1056 String option = null; 1057 1058 while (true) 1059 { 1060 c = in.read(); 1061 1062 switch (c) 1063 { 1064 case EOF: 1065 unclosed("include-decl", startline, startcol); 1066 if (dbg) dbgexit(); 1067 return; 1068 1069 case '[': 1070 if (in.match('=')) { 1071 //log.warn("Expressions cannot exist in file includes. Ignoring \"[=\" in [include-static... section starting at:", startline, startcol); 1072 //we will throw an exception. failing early is better. this 1073 //does preclude having '[=' in the file name, but it's a good tradeoff 1074 error("Expressions cannot exist in include-decl. The offending static-include section starts at:", startline, startcol); 1075 } 1076 append(c); 1077 break; 1078 1079 case ']': 1080 IncludeDecl i = new IncludeDecl(buf); 1081 if (option != null) 1082 i.setOption(option); 1083 inc_decl.add(i); 1084 buf.reset(); 1085 if (dbg) dbgexit(); 1086 return; 1087 1088 case 'o': 1089 if (! in.match("ption")) 1090 append(c); 1091 else{ 1092 skipWS(); 1093 if (! in.match("=")) { 1094 error("bad option parameter in include-code: ", startline, startcol); 1095 } 1096 skipWS(); 1097 1098 int c2; 1099 StringBuilder optionbuf = new StringBuilder(); 1100 while (true) { 1101 c2 = in.read(); 1102 if (c2 == ']' || c2 == EOF || Character.isWhitespace(c2)) { 1103 in.unread(); 1104 break; 1105 } 1106 optionbuf.append((char)c2); 1107 } 1108 1109 option = optionbuf.toString(); 1110 //System.out.println(option); 1111 } //else 1112 break; 1113 1114 default: 1115 append(c); 1116 } 1117 } 1118 } 1119 1120//the filename/url can be optionally double quoted. leading/trailing 1121//double quotes (if any) are ignored when an include is rendered... 1122//this way there isn't any additional parsing needed here...I could 1123//ignore the optional quote here (and that's the formal proper way) 1124//and then not move the ignore quote logic into the render() method but 1125//this way is good too...and simpler.. 1126//same goes for the other parseIncludeXX/ForwardXX functions. 1127void parseInclude() throws IOException 1128 { 1129 if (dbg) dbgenter(); 1130 1131 int startline = in.getLine(); 1132 int startcol = in.getCol(); 1133 Include include = new Include(); 1134 while (true) 1135 { 1136 c = in.read(); 1137 1138 switch (c) 1139 { 1140 case EOF: 1141 unclosed("include", startline, startcol); 1142 if (dbg) dbgexit(); 1143 return; 1144 1145 case '[': 1146 if (in.match('=')) { 1147 include.add(buf); 1148 buf.reset(); 1149 parseExpression(include); 1150 } 1151 else{ 1152 append(c); 1153 } 1154 break; 1155 1156 case ']': 1157 include.add(buf); 1158 tree.add(include); 1159 buf.reset(); 1160 if (dbg) dbgexit(); 1161 return; 1162 1163 default: 1164 append(c); 1165 } 1166 } 1167 } 1168 1169void parseForward() throws IOException 1170 { 1171 if (dbg) dbgenter(); 1172 1173 int startline = in.getLine(); 1174 int startcol = in.getCol(); 1175 1176 Forward forward = new Forward(); 1177 while (true) 1178 { 1179 c = in.read(); 1180 1181 switch (c) 1182 { 1183 case EOF: 1184 unclosed("forward", startline, startcol); 1185 if (dbg) dbgexit(); 1186 return; 1187 1188 case '[': 1189 if (in.match('=')) { 1190 forward.add(buf); 1191 buf.reset(); 1192 parseExpression(forward); 1193 } 1194 else{ 1195 append(c); 1196 } 1197 break; 1198 1199 case ']': 1200 forward.add(buf); 1201 tree.add(forward); 1202 buf.reset(); 1203 if (dbg) dbgexit(); 1204 return; 1205 1206 default: 1207 append(c); 1208 } 1209 } 1210 } 1211 1212//we need to parse imports seperately because they go outside 1213//a class declaration (and [!...!] goes inside a class) 1214//import XXX.*; 1215//class YYY { 1216//[!....stuff from here ....!] 1217//... 1218void parseImport() throws IOException 1219 { 1220 if (dbg) dbgenter(); 1221 1222 int startline = in.getLine(); 1223 int startcol = in.getCol(); 1224 1225 while (true) 1226 { 1227 c = in.read(); 1228 1229 switch (c) 1230 { 1231 case EOF: 1232 unclosed("import", startline, startcol); 1233 if (dbg) dbgexit(); 1234 return; 1235 1236 case '\n': 1237 imps.add(new Import(buf)); 1238 buf.reset(); 1239 break; 1240 1241 case ']': 1242 imps.add(new Import(buf)); 1243 buf.reset(); 1244 if (dbg) dbgexit(); 1245 return; 1246 1247 default: 1248 append(c); 1249 } 1250 } 1251 } 1252 1253/* 1254Called when // was read at the top level inside a code block. Appends 1255the contents of a // comment to the buffer (not including the trailing 1256newline) 1257*/ 1258void appendCodeSlashComment() throws IOException 1259 { 1260 if (dbg) dbgenter(); 1261 1262 while (true) 1263 { 1264 c = in.read(); 1265 1266 if (c == EOF) 1267 break; 1268 1269 //do not append \r, \r\n, or \n, that finishes the // comment 1270 //we need that newline to figure out if the next line is a hash 1271 //line 1272 if (c == '\r') { 1273 in.unread(); 1274 break; 1275 } 1276 1277 if (c == '\n') { 1278 in.unread(); 1279 break; 1280 } 1281 1282 append(c); 1283 } 1284 1285 if (dbg) dbgread("CodeSLASHComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString())); 1286 if (dbg) dbgexit(); 1287 } 1288 1289/* 1290Called when /* was read at the top level inside a code block. Appends 1291the contents of a /*comment to the buffer. (not including any trailing 1292newline or spaces) 1293*/ 1294void appendCodeStarComment() throws IOException 1295 { 1296 if (dbg) dbgenter(); 1297 1298 while (true) 1299 { 1300 c = in.read(); 1301 1302 if (c == EOF) 1303 break; 1304 1305 append(c); 1306 1307 if (c == '*') 1308 { 1309 if (in.match('/')) { 1310 append('/'); 1311 break; 1312 } 1313 } 1314 } 1315 1316 if (dbg) dbgread("CodeSTARComment Finished: Buffer=" + StringUtil.viewableAscii(buf.toString())); 1317 if (dbg) dbgexit(); 1318 } 1319 1320/* 1321Called (outside of any comments in the code block) when: 1322--> parseCode() 1323 ... " 1324 ^ (we are here) 1325*/ 1326void appendCodeString() throws IOException 1327 { 1328 if (dbg) dbgenter(); 1329 1330 int startline = in.getLine(); 1331 int startcol = in.getCol(); 1332 1333 while (true) 1334 { 1335 c = in.read(); 1336 1337 if (c == EOF || c == '\r' || c == '\n') 1338 unclosed("string literal", startline, startcol); 1339 1340 append(c); 1341 1342 if (c == '\\') { 1343 c = in.read(); 1344 if (c == EOF) 1345 unclosed("string literal", startline, startcol); 1346 else { 1347 append(c); 1348 continue; //so \" does not hit the if below and break 1349 } 1350 } 1351 1352 if (c == '"') 1353 break; 1354 } 1355 1356 if (dbg) dbgread("appendCodeString Finished: Buffer=" + StringUtil.viewableAscii(buf.toString())); 1357 if (dbg) dbgexit(); 1358 } 1359 1360 1361/* 1362Called (outside of any comments in the code block) when: 1363--> parseCode() 1364 ... ' 1365 ^ (we are here) 1366*/ 1367void appendCodeCharLiteral() throws IOException 1368 { 1369 if (dbg) dbgenter(); 1370 1371 int startline = in.getLine(); 1372 int startcol = in.getCol(); 1373 1374 while (true) 1375 { 1376 c = in.read(); 1377 1378 if (c == EOF || c == '\r' || c == '\n') 1379 unclosed("char literal", startline, startcol); 1380 1381 append(c); 1382 1383 if (c == '\\') { 1384 c = in.read(); 1385 if (c == EOF) 1386 unclosed("char literal", startline, startcol); 1387 else { 1388 append(c); 1389 continue; //so \' does not hit the if below and break 1390 } 1391 } 1392 1393 if (c == '\'') 1394 break; 1395 } 1396 1397 if (dbg) dbgread("appendCodeCharLiteral Finished: Buffer=" + StringUtil.viewableAscii(buf.toString())); 1398 if (dbg) dbgexit(); 1399 } 1400 1401 1402/* 1403Reads from the current position till the first nonwhitespace char, EOF or 1404newline is encountered. Reads are into the whitespace buffer. does not 1405consume the character past the non-whitespace character and does 1406NOT read multiple lines of whitespace. 1407*/ 1408void readToFirstNonWS() throws IOException 1409 { 1410 wsbuf.reset(); 1411 1412 while (true) 1413 { 1414 c = in.read(); 1415 1416 if (c == '\r' || c == '\n') 1417 break; 1418 1419 if (c == EOF || ! Character.isWhitespace(c)) 1420 break; 1421 1422 wsbuf.append((char)c); 1423 } 1424 1425 in.unread(); 1426 } 1427 1428//skip till end of whitespace or EOF. does not consume any chars past 1429//the whitespace. 1430void skipWS() throws IOException 1431 { 1432 int c2 = EOF; 1433 while (true) { 1434 c2 = in.read(); 1435 if (c2 == EOF || ! Character.isWhitespace(c2)) { 1436 in.unread(); 1437 break; 1438 } 1439 } 1440 } 1441 1442//skips to the end of line if the rest of the line is (from the current 1443//position), all whitespace till the end. otherwise, does not change 1444//current position. consumes trailing newlines (if present) when reading 1445//whitespace. 1446void skipIfWhitespaceToEnd() throws IOException 1447 { 1448 int count = 0; 1449 1450 while (true) 1451 { 1452 c = in.read(); 1453 count++; 1454 1455 if (c == '\r') { 1456 in.match('\n'); 1457 return; 1458 } 1459 1460 if (c == '\n' || c == EOF) 1461 return; 1462 1463 if (! Character.isWhitespace(c)) 1464 break; 1465 } 1466 1467 in.unread(count); 1468 } 1469 1470//not used anymore but left here for potential future use. does not 1471//consume the newline (if present) 1472void skipToLineEnd() throws IOException 1473 { 1474 while (true) 1475 { 1476 int c = in.read(); 1477 if (c == EOF) { 1478 in.unread(); 1479 break; 1480 } 1481 if (c == '\n' || c == '\r') { 1482 in.unread(); 1483 break; 1484 } 1485 } 1486 } 1487 1488String quote(final char c) 1489 { 1490 switch (c) 1491 { 1492 case '\r': 1493 return "\\r"; 1494 1495 case '\n': 1496 return "\\n"; 1497 1498 case '\"': /* this is a quirk, '\"' is same as '"' for char literals, keeping as-is for legacy*/ 1499 //can also say: new String(new char[] {'\', '"'}) 1500 return "\\\""; //--> \" 1501 1502 case '\\': 1503 return "\\\\"; 1504 1505 default: 1506 return String.valueOf(c); 1507 } 1508 } 1509 1510//======= util and debug methods ========================== 1511String methodName(int framenum) 1512 { 1513 StackTraceElement ste[] = new Exception().getStackTrace(); 1514 //get method that called us, we are ste[0] 1515 StackTraceElement st = ste[framenum]; 1516 String file = st.getFileName(); 1517 int line = st.getLineNumber(); 1518 String method = st.getMethodName(); 1519 String threadname = Thread.currentThread().getName(); 1520 return method + "()"; 1521 } 1522 1523void dbgenter() { 1524 System.out.format("%s-->%s\n", StringUtil.repeat('\t', dbgtab++), methodName(2)); 1525 } 1526 1527void dbgexit() { 1528 System.out.format("%s<--%s\n", StringUtil.repeat('\t', --dbgtab), methodName(2)); 1529 } 1530 1531void dbgread(String str) { 1532 System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str)); 1533 } 1534 1535void dbgread(String str, List list) { 1536 System.out.format("%s %s: ", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(str)); 1537 for (int n = 0; n < list.size(); n++) { 1538 System.out.print( StringUtil.viewableAscii( (String)list.get(n) ) ); 1539 } 1540 System.out.println(""); 1541 } 1542 1543void dbgread(char c) { 1544 System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(c)); 1545 } 1546 1547void dbgread(CharArrayWriter buf) { 1548 System.out.format("%s %s\n", StringUtil.repeat('\t', dbgtab), StringUtil.viewableAscii(buf.toString())); 1549 } 1550 1551void unclosed(String blockname, int startline, int startcol) throws IOException 1552 { 1553 throw new IOException(blockname + " tag not closed.\nThis tag was possibly opened in: \nFile:" 1554 + inputFile + ", line:" 1555 + startline + " column:" + startcol + 1556 ".\nCurrent line:" + in.getLine() + " column:" + in.getCol()); 1557 } 1558 1559void error(String msg, int line, int col) throws IOException 1560 { 1561 throw new IOException("Error in File:" + inputFile + " Line:" + line + " Col:" + col + " " + msg); 1562 } 1563 1564void error(String msg) throws IOException 1565 { 1566 throw new IOException("Error in File:" + inputFile + " " + msg); 1567 } 1568 1569//============== Non Parsing methods ================================ 1570void o(Object str) { 1571 out.print(str); 1572 } 1573 1574void ol(Object str) { 1575 out.println(str); 1576 } 1577 1578void ol() { 1579 out.println(); 1580 } 1581 1582/** 1583Returns the src_encoding directive (if any) defined in this page or <tt>null</tt> if no source encoding 1584has been specified. 1585*/ 1586String getSourceEncoding() { 1587 return src_encoding; 1588 } 1589 1590/* 1591include an external file whose contents will be rendered as part of the page. 1592*/ 1593void includeFile(CharArrayWriter buf, String option) throws IOException 1594 { 1595 String str; 1596 1597 if (dbg) dbgread("<new INCLUDE-FILE> "); 1598 str = removeLeadingTrailingQuote(buf.toString().trim()); 1599 1600 File includeFile = null; 1601 File parentDir = inputFile.getParentFile(); 1602 if (parentDir == null) { 1603 parentDir = new File("."); 1604 } 1605 1606 if (str.startsWith("/")) 1607 includeFile = new File(contextRoot, str); 1608 else 1609 includeFile = new File(parentDir, str); 1610 1611 //System.out.println(">>>>>>>>>> f="+f +";root="+contextRoot); 1612 1613 if (! includeFile.exists()) { 1614 throw new IOException("Include file does not exist: " + includeFile.getCanonicalPath()); 1615 } 1616 1617 if (circularityTrack.contains(includeFile.getAbsolutePath())) { 1618 throw new IOException("Circularity detected when including: " + includeFile.getCanonicalPath() + "\nAlready included the following files: " + circularityTrack); 1619 } 1620 1621 tree.add(new MollyComment( 1622 "//>>>START INCLUDE from: " + includeFile.getAbsolutePath())); 1623 1624 /* 1625 PageParser pp = new PageParser(contextRoot, includeFile, out, classname, log); 1626 pp.includeMode().parse(); //writes to out 1627 */ 1628 1629 in.insertIntoStream(includeFile); 1630 1631 /* this is printed immediately before the inserted contents can be processed, so don't add this */ 1632 /* 1633 tree.add(new MollyComment( 1634 "//>>>END INCLUDE from: " + includeFile.getAbsolutePath())); 1635 */ 1636 1637 circularityTrack.add(includeFile.getAbsolutePath()); 1638 } 1639 1640 1641void writePage() throws IOException 1642 { 1643 if (! includeMode) 1644 { 1645 if (directives.containsKey(d_src_encoding)) { 1646 this.src_encoding = (String) directives.get(d_src_encoding); 1647 this.src_encoding = removeLeadingTrailingQuote(this.src_encoding); 1648 } 1649 1650 //create a appropriate PrintWriter based on either the default 1651 //java encoding or the page specified java encoding 1652 //the java source file will be written out in this encoding 1653 1654 FileOutputStream fout = new FileOutputStream(outputFile); 1655 OutputStreamWriter fw = (src_encoding != null) ? 1656 new OutputStreamWriter(fout, src_encoding) : 1657 new OutputStreamWriter(fout, Page.DEFAULT_SRC_ENCODING); 1658 1659 out = new PrintWriter(new BufferedWriter(fw)); 1660 } 1661 1662 if (! includeMode) 1663 { 1664 writePackage(); 1665 writeImports(); 1666 1667 o ("public class "); 1668 o (classname); 1669 ol(" extends fc.web.page.PageImpl"); 1670 ol("{"); 1671 } 1672 1673 writeFields(); 1674 1675 if (! includeMode) { 1676 writeConstructor(); 1677 } 1678 1679 writeMethods(); 1680 1681 if (! includeMode) { 1682 ol("}"); 1683 } 1684 } 1685 1686void writePackage() 1687 { 1688 o ("package "); 1689 o (packagename); 1690 ol(";"); 1691 ol(); 1692 } 1693 1694void writeImports() throws IOException 1695 { 1696 ol("import javax.servlet.*;"); 1697 ol("import javax.servlet.http.*;"); 1698 ol("import java.io.*;"); 1699 ol("import java.util.*;"); 1700 //write this in case (very rare) that a page overrides the 1701 //Page.init()/destory methods [we need pageservlet for init(..)] 1702 ol("import fc.web.page.PageServlet;"); 1703 for (int n = 0; n < imps.size(); n++) { 1704 ((Element)imps.get(n)).render(); 1705 ol(); 1706 } 1707 ol(); 1708 } 1709 1710void writeFields() 1711 { 1712 } 1713 1714void writeConstructor() 1715 { 1716 } 1717 1718void writeMethods() throws IOException 1719 { 1720 writeDeclaredMethods(); 1721 writeIncludedMethods(); 1722 writeRenderMethod(); 1723 } 1724 1725void writeDeclaredMethods() throws IOException 1726 { 1727 for (int n = 0; n < decl.size(); n++) { 1728 ((Element)decl.get(n)).render(); 1729 } 1730 1731 if (decl.size() > 0) 1732 ol(); 1733 } 1734 1735void writeIncludedMethods() throws IOException 1736 { 1737 for (int n = 0; n < inc_decl.size(); n++) { 1738 ((Element)inc_decl.get(n)).render(); 1739 } 1740 1741 if (inc_decl.size() > 0) 1742 ol(); 1743 } 1744 1745void writeRenderMethod() throws IOException 1746 { 1747 if (! includeMode) { 1748 writeRenderTop(); 1749 } 1750 1751 //if (! (e instanceof Comment || e instanceof Decl || e instanceof MollyComment)) { 1752 1753 boolean removeInitialEmpty = directives.containsKey(d_remove_initial_whitespace); 1754 1755 boolean firstNonWhiteSpaceNodeSeen = false; 1756 1757 //render entire tree 1758 for (int n = 0; n < tree.size(); n++) 1759 { 1760 Element elem = (Element)tree.get(n); 1761 1762 if (elem instanceof Text) { 1763 Text t = (Text) elem; 1764 //System.out.println("before whitespace removal: " + elem); 1765 if (removeInitialEmpty && ! firstNonWhiteSpaceNodeSeen) { 1766 if (t.isOnlyWhiteSpaceNode()) { 1767 t.clear(); 1768 } 1769 else{ 1770 t.removeInitialEmptyLines(); 1771 firstNonWhiteSpaceNodeSeen = true; 1772 } 1773 } 1774 //System.out.println("after whitespace removal: " + elem); 1775 //System.out.println("---------------------------"); 1776 } 1777 1778 elem.render(); 1779 } 1780 1781 if (! includeMode) { 1782 writeRenderBottom(); 1783 } 1784 1785 } 1786 1787void writeRenderTop() throws IOException 1788 { 1789 ol("public void render(HttpServletRequest req, HttpServletResponse res) throws Exception"); 1790 ol("\t{"); 1791 ol(" /* for people used to typing 'request/response' */"); 1792 ol(" final HttpServletRequest request = req;"); 1793 ol(" final HttpServletResponse response = res;"); 1794 ol(); 1795 //mime+charset 1796 String content_type = ""; 1797 if (directives.containsKey(d_mimetype)) 1798 { 1799 String mtype = (String) directives.get(d_mimetype); 1800 if (! (mtype.equals("") || mtype.equals(mimetype_none)) ) 1801 { 1802 mtype = removeLeadingTrailingQuote(mtype); 1803 content_type += mtype; 1804 } 1805 } 1806 else{ 1807 content_type += Page.DEFAULT_MIME_TYPE; 1808 } 1809 1810 1811 if (directives.containsKey(d_encoding)) { 1812 String encoding = (String) directives.get(d_encoding); 1813 encoding = removeLeadingTrailingQuote(encoding); 1814 /*an empty encoding means that the encoding is specified in the 1815 html header*/ 1816 if (! encoding.trim().equals("")) { 1817 content_type += "; charset="; 1818 content_type += encoding; 1819 } 1820 } 1821 else{ 1822 content_type += "; charset="; 1823 content_type += Page.DEFAULT_ENCODING; 1824 } 1825 1826 o (" res.setContentType(\""); o (content_type); ol("\");"); 1827 1828 //buffer 1829 if (directives.containsKey(d_buffersize)) { 1830 o (" res.setBufferSize("); 1831 o (directives.get(d_buffersize)); 1832 ol(");"); 1833 } 1834 1835 //stream or writer 1836 boolean stream = false; 1837 if (directives.containsKey(d_out)) 1838 { 1839 String stream_type = ((String) directives.get(d_out)).toLowerCase().intern(); 1840 1841 if (stream_type == d_out_stream1 || stream_type == d_out_stream2) { 1842 stream = true; 1843 } 1844 else if (stream_type == d_out_writer) { 1845 stream = false; 1846 } 1847 else{ 1848 error("Did not understand directive [directive name=out, value=" + stream_type + "]. Choose between (" + d_out_stream1 + ") and (" + d_out_writer + ")"); 1849 } 1850 } 1851 1852 if (stream) 1853 ol(" ServletOutputStream out = res.getOutputStream();"); 1854 else 1855 ol(" PrintWriter out = res.getWriter();"); 1856 1857 } 1858 1859void writeRenderBottom() throws IOException 1860 { 1861 ol(); 1862 ol("\t} //~render end"); 1863 } 1864 1865 1866/* 1867int tabcount = 1; 1868String tab = "\t"; 1869void tabInc() { 1870 tab = StringUtil.repeat('\t', ++tabcount); 1871 } 1872void tabDec() { 1873 tab = StringUtil.repeat('\t', --tabcount); 1874 } 1875*/ 1876 1877abstract class Element { 1878 abstract void render() throws IOException; 1879 //text, include etc., implement this as needed. 1880 void addExp(Exp e) { 1881 throw new RuntimeException("Internal error: not implemented by this object"); 1882 } 1883 } 1884 1885//this should NOT be added to the tree directly but added to Text or Hash 1886//via the addExp() method. This is because exps must be printed inline 1887class Exp extends Element 1888 { 1889 String str; 1890 1891 Exp(CharArrayWriter buf) { 1892 this.str = buf.toString(); 1893 if (dbg) dbgread("<new EXP> "+ str); 1894 } 1895 1896 void render() { 1897 o("out.print ("); 1898 o(str); 1899 ol(");"); 1900 } 1901 1902 public String toString() { 1903 return "Exp: [" + str + "]"; 1904 } 1905 } 1906 1907//this is of course mofo crazy but that's java regex embedded into java strings for you 1908final String whiteSpaceOnlyPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*$"; 1909final String whiteSpaceBeginPat = "^(\\\\n(?<!\\\\)|\\\\r(?<!\\\\)|\\\\t(?<!\\\\)| )*"; 1910 1911class Text extends Element 1912 { 1913 String offset_space; 1914 final List list = new ArrayList(); 1915 int nodeNumber = textNodeCounter++; 1916 1917 //each text section is parsed by a text node. Within EACH text 1918 //node, we split it's contained text into separate lines and 1919 //generate code to print each line with a "out.println(...)" 1920 //statement. This maintains the same source order as the molly 1921 //page. If we munge together everything and print all of it's 1922 //contents with just one out.println(...)" statement, we would 1923 //get one large line with embedded \n and that would make 1924 //things more difficult to co-relate with the source file. 1925 1926 Text(final String offset, final CharArrayWriter b) 1927 { 1928 if (offset == null) { 1929 offset_space = "\t"; 1930 } 1931 else{ 1932 offset_space = "\t" + offset; 1933 } 1934 1935 final char[] buf = b.toCharArray(); 1936 1937 boolean prevWasCR = false; 1938 //jdk default is 32. we say 256. not too large, maybe 1939 //less cache pressure. not too important, gets resized 1940 //as needed anyway. 1941 final CharArrayWriter tmp = new CharArrayWriter(256); 1942 1943 //intead of tmp.size() > MAX_TEXT_LITERAL_LENGTH in the first if statement below, make 1944 //things faster by avoiding a method call in a loop and using a local variable tcount 1945 int tcount = 0; 1946 1947 for (int i=0, j=1; i < buf.length; i++, j++) 1948 { 1949 char c = buf[i]; 1950 tcount++; 1951 if (tcount > MAX_TEXT_LITERAL_LENGTH || j == buf.length) { 1952 tmp.append(quote(c)); 1953 list.add(tmp.toString()); 1954 tmp.reset(); 1955 tcount = 0; 1956 } 1957 else if (c == '\n') { 1958 tmp.append(quote(c)); 1959 if (! prevWasCR) { 1960 list.add(tmp.toString()); 1961 tmp.reset(); 1962 tcount = 0; 1963 } 1964 } 1965 else if (c == '\r') { 1966 tmp.append(quote(c)); 1967 list.add(tmp.toString()); 1968 tmp.reset(); 1969 tcount = 0; 1970 prevWasCR = true; 1971 } 1972 else{ 1973 tmp.append(quote(c)); 1974 prevWasCR = false; 1975 } 1976 } 1977 1978 if (dbg) { 1979 String classname = getClass().getName(); 1980 dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 1981 } 1982 } 1983 1984 Text(CharArrayWriter b) 1985 { 1986 this(null, b); 1987 } 1988 1989 void addExp(Exp e) 1990 { 1991 list.add(e); 1992 } 1993 1994 void render() 1995 { 1996 for (int i=0; i<list.size(); i++) 1997 { 1998 Object obj = list.get(i); //can be String or Exp 1999 if (obj instanceof Exp) { 2000 o(offset_space); 2001 ((Exp)obj).render(); 2002 } 2003 else{ 2004 o(offset_space); 2005 o("out.print (\""); 2006 o(obj); 2007 ol("\");"); 2008 } 2009 } 2010 } //render 2011 2012 boolean isOnlyWhiteSpaceLine(String s) 2013 { 2014 return s.matches(whiteSpaceOnlyPat); 2015 } 2016 2017 // one or all lines (each line being a separate list item) in this text node are white space 2018 // example 3 lines: ['\n', '\n', ' \t\n'] -> true 2019 // example 3 lines: ['\n', '\n', Exp Node, ' \t\n'] -> false 2020 boolean isOnlyWhiteSpaceNode() 2021 { 2022 for (int n = 0; n < list.size(); n++) { 2023 Object obj = list.get(n); 2024 if (! (obj instanceof String)) { 2025 return false; 2026 } 2027 if (! isOnlyWhiteSpaceLine((String)obj)) { 2028 return false; 2029 } 2030 } 2031 return true; 2032 } 2033 2034 // some lines (each line being a separate list item) in this text node are white space 2035 // before: ['\n', '\n', ' \n\thello\n', Exp Node, '\n'] 2036 // after: ['hello\n', Exp Node, '\n'] 2037 void removeInitialEmptyLines() 2038 { 2039 Iterator it = list.iterator(); //have to use iterator when removing while transversing 2040 while (it.hasNext()) 2041 { 2042 Object obj = it.next(); 2043 if (! (obj instanceof String)) { 2044 break; 2045 } 2046 String s = (String)obj; 2047 if (isOnlyWhiteSpaceLine(s)) { 2048 it.remove(); 2049 } 2050 else{ 2051 s.replaceFirst(whiteSpaceBeginPat, ""); 2052 break; 2053 } 2054 } 2055 } 2056 2057 //clear all contents of this node - used only for white space removal 2058 void clear() 2059 { 2060 list.clear(); 2061 } 2062 2063 public String toString() { 2064 StringBuilder buf = new StringBuilder(); 2065 buf.append("Text (#" + nodeNumber + "):"); 2066 if (list.size() == 0) { 2067 append("<EMPTY>"); 2068 } 2069 else{ 2070 for (int n = 0; n < list.size(); n++) { 2071 buf.append("["); 2072 buf.append(StringUtil.viewableAscii(String.valueOf(list.get(n)))); 2073 buf.append("]"); 2074 if (n + 1 < list.size()) { 2075 buf.append(","); 2076 } 2077 } 2078 } 2079 return buf.toString(); 2080 } 2081 2082 } 2083 2084class Hash extends Text 2085 { 2086 Hash(final String offset, final CharArrayWriter b) 2087 { 2088 super(offset, b); 2089 } 2090 2091 //same as super.render() except for j == list.size() o/ol() below 2092 void render() 2093 { 2094 for (int i=0, j=1; i<list.size(); i++, j++) 2095 { 2096 Object obj = list.get(i); //can be String or Exp 2097 if (obj instanceof Exp) { 2098 o(offset_space); 2099 ((Exp)obj).render(); 2100 } 2101 else{ 2102 o(offset_space); 2103 o("out.print (\""); 2104 o(obj); 2105 2106 if (j == list.size()) 2107 o ("\");"); 2108 else 2109 ol("\");"); 2110 } 2111 } 2112 } //render 2113 2114 public String toString() { 2115 return "Hash: " + list; 2116 } 2117 } 2118 2119class Heredoc extends Text 2120 { 2121 Heredoc(final CharArrayWriter buf) 2122 { 2123 super(null, buf); 2124 } 2125 2126 //override, exp cannot be added to heredoc sections 2127 void addExp(Exp e) 2128 { 2129 throw new IllegalStateException("Internal implementation error: this method should not be called for a Heredoc object"); 2130 } 2131 2132 void render() 2133 { 2134 for (int i=0, j=1; i<list.size(); i++, j++) 2135 { 2136 Object obj = list.get(i); 2137 o(offset_space); 2138 o("out.print (\""); 2139 o(obj); 2140 ol("\");"); 2141 } 2142 } //render 2143 2144 public String toString() { 2145 return "Heredoc: " + list; 2146 } 2147 2148 } 2149 2150class Code extends Element 2151 { 2152 List list = new ArrayList(); 2153 2154 Code(CharArrayWriter b) 2155 { 2156 //we split the code section into separate lines and 2157 //print each line with a out.print(...). This maintains 2158 //the same source order as the molly page. If we munge together 2159 //everything, we would get one large line with embedded \n 2160 //and that would make things more difficult to co-relate. 2161 final char[] buf = b.toCharArray(); 2162 CharArrayWriter tmp = new CharArrayWriter(); 2163 for (int i=0, j=1; i < buf.length; i++, j++) { 2164 char c = buf[i]; 2165 if (j == buf.length) { //end of buffer 2166 tmp.append(c); 2167 list.add(tmp.toString()); 2168 tmp.reset(); 2169 } 2170 else if (c == '\n') { 2171 tmp.append(c); 2172 list.add(tmp.toString()); 2173 tmp.reset(); 2174 } 2175 else 2176 tmp.append(c); 2177 } 2178 if (dbg) { 2179 String classname = getClass().getName(); 2180 dbgread("<new " + classname.substring(classname.indexOf("$")+1,classname.length()) + ">",list); 2181 } 2182 } 2183 2184 void render() { 2185 for (int i = 0; i < list.size(); i++) { 2186 o('\t'); 2187 o(list.get(i)); 2188 } 2189 } 2190 2191 public String toString() { 2192 return "Code: " + list; 2193 } 2194 } 2195 2196class Comment extends Element 2197 { 2198 String str; 2199 2200 Comment(CharArrayWriter buf) { 2201 this.str = buf.toString(); 2202 if (dbg) dbgread("<new COMMENT> "+ str); 2203 } 2204 2205 void render() { 2206 //we don't print commented sections 2207 } 2208 2209 public String toString() { 2210 return "Comment: [" + str + "]"; 2211 } 2212 } 2213 2214class Decl extends Code 2215 { 2216 Decl(CharArrayWriter buf) { 2217 super(buf); 2218 } 2219 2220 void render() { 2221 for (int i = 0; i < list.size(); i++) { 2222 o (list.get(i)); 2223 } 2224 } 2225 } 2226 2227/* base class for Forward and Include */ 2228class ForwardIncludeElement extends Element 2229 { 2230 List parts = new ArrayList(); 2231 boolean useBuf = false; 2232 2233 // the following is for includes with expressions 2234 // [include foo[=i].html] 2235 // i could be 1,2,3.. the parser adds the xpression [=i] to this 2236 // object if it's present via the addExp method below 2237 void add(CharArrayWriter buf) 2238 { 2239 parts.add(buf.toString().trim()); 2240 if (parts.size() > 1) { 2241 useBuf = true; 2242 } 2243 } 2244 2245 void addExp(Exp e) 2246 { 2247 parts.add(e); 2248 useBuf = true; 2249 } 2250 2251 void render() throws IOException 2252 { 2253 if (parts.size() == 0) { 2254 //log.warn("possible internal error, parts.size()==0 in Forward"); 2255 return; 2256 } 2257 2258 ol("\t{ //this code block gives 'rd' its own namespace"); 2259 2260 if (! useBuf) { 2261 o ("\tfinal RequestDispatcher rd = req.getRequestDispatcher(\""); 2262 //only 1 string 2263 o (removeLeadingTrailingQuote(parts.get(0).toString())); 2264 ol("\");"); 2265 } 2266 else{ 2267 ol("\tfinal StringBuilder buf = new StringBuilder();"); 2268 for (int n = 0; n < parts.size(); n++) { 2269 Object obj = parts.get(n); 2270 if ( n == 0 || (n + 1) == parts.size() ) { 2271 obj = removeLeadingTrailingQuote(obj.toString()); 2272 } 2273 if (obj instanceof String) { 2274 o ("\tbuf.append(\""); 2275 o (obj); 2276 ol("\");"); 2277 } 2278 else{ 2279 o ("\tbuf.append("); 2280 o ( ((Exp)obj).str ); 2281 ol(");"); 2282 } 2283 } //for 2284 ol("\tfinal RequestDispatcher rd = req.getRequestDispatcher(buf.toString());"); 2285 } //else 2286 } 2287 2288 2289 public String toString() { 2290 return "Forward: " + parts; 2291 } 2292 } 2293 2294/* a request dispatcher based include. */ 2295class Include extends ForwardIncludeElement 2296 { 2297 Include() { 2298 if (dbg) dbgread("<new INCLUDE> "); 2299 } 2300 2301 void render() throws IOException 2302 { 2303 super.render(); 2304 ol("\trd.include(req, res);"); 2305 ol("\t} //end rd block"); 2306 } 2307 2308 /* uses parent toString */ 2309 } 2310 2311/* a request dispatcher based forward */ 2312class Forward extends ForwardIncludeElement 2313 { 2314 Forward() { 2315 if (dbg) dbgread("<new FORWARD>"); 2316 } 2317 2318 void render() throws IOException 2319 { 2320 super.render(); 2321 ol("\t//WARNING: any uncommitted page content before this forward will be discarded."); 2322 ol("\t//If the response has already been committed an exception will be thrown. "); 2323 2324 ol("\trd.forward(req, res);"); 2325 2326 ol("\t//NOTE: You should 'return' right after this line. There should be no content in your "); 2327 ol("\t//page after the forward statement"); 2328 ol("\t} //end rd block"); 2329 } 2330 2331 /* uses parent toString */ 2332 } 2333 2334 2335/* a molly mechanism to include an external file containing code and method 2336 declarations. These are typically commom utility methods and global 2337 vars. The included file is not parsed by the molly parser... the contents 2338 are treated as if they were written directly inside a [!....!] block. 2339*/ 2340class IncludeDecl extends Element 2341 { 2342 String str; 2343 String opt; 2344 2345 IncludeDecl(CharArrayWriter buf) { 2346 if (dbg) dbgread("<new INCLUDE-DECL> "); 2347 str = removeLeadingTrailingQuote(buf.toString().trim()); 2348 } 2349 2350 void setOption(String opt) { 2351 this.opt = opt; 2352 } 2353 2354 void render() throws IOException 2355 { 2356 File f = null; 2357 File parentDir = inputFile.getParentFile(); 2358 if (parentDir == null) { 2359 parentDir = new File("."); 2360 } 2361 2362 final int strlen = str.length(); 2363 2364 if (str.startsWith("\"") || str.startsWith("'")) 2365 { 2366 if (strlen == 1) //just " or ' 2367 throw new IOException("Bad include file name: " + str); 2368 2369 str = str.substring(1, strlen); 2370 } 2371 2372 if (str.endsWith("\"") || str.endsWith("'")) 2373 { 2374 if (strlen == 1) //just " or ' 2375 throw new IOException("Bad include file name: " + str); 2376 2377 str = str.substring(0, strlen-1); 2378 } 2379 2380 if (str.startsWith("/")) 2381 f = new File(contextRoot, str); 2382 else 2383 f = new File(parentDir, str); 2384 2385 /* f = new File(parentDir, str); */ 2386 2387 if (! f.exists()) { 2388 throw new IOException("Include file does not exist: " + f.getCanonicalPath()); 2389 } 2390 2391 o("//>>>START INCLUDE DECLARTIONS from: "); 2392 o(f.getAbsolutePath()); 2393 ol(); 2394 2395 o(IOUtil.inputStreamToString(new FileInputStream(f))); 2396 2397 o("//>>>END INCLUDE DECLARATIONS from: "); 2398 o(f.getAbsolutePath()); 2399 ol(); 2400 2401 //circularities are tricky, later 2402 //includeMap.put(pageloc, f.getCanonicalPath()); 2403 } 2404 2405 public String toString() { 2406 return "IncludeDecl: [" + str + "; options: " + opt + "]"; 2407 } 2408 } 2409 2410class Import extends Code 2411 { 2412 Import(CharArrayWriter buf) { 2413 super(buf); 2414 } 2415 2416 void render() { 2417 for (int i = 0; i < list.size(); i++) { 2418 o (list.get(i)); 2419 } 2420 } 2421 } 2422 2423class MollyComment extends Element 2424 { 2425 String str; 2426 2427 MollyComment(String str) { 2428 this.str = str; 2429 if (dbg) dbgread("<new MollyComment> "+ str); 2430 } 2431 2432 void render() { 2433 ol(str); 2434 } 2435 2436 public String toString() { 2437 return "MollyComment: [" + str + "]"; 2438 } 2439 } 2440 2441/** 2442removes starting and trailing single/double quotes. used by the 2443include/forward render methods only, NOT used while parsing. 2444*/ 2445private static String removeLeadingTrailingQuote(String str) 2446 { 2447 if (str == null) 2448 return str; 2449 2450 if ( str.startsWith("\"") || str.startsWith("'") ) { 2451 str = str.substring(1, str.length()); 2452 } 2453 2454 if ( str.endsWith("\"") || str.endsWith("'") ) { 2455 str = str.substring(0, str.length()-1); 2456 } 2457 2458 return str; 2459 } 2460 2461//=============================================== 2462 2463public static void main (String args[]) throws IOException 2464 { 2465 Args myargs = new Args(args); 2466 myargs.setUsage("java " + myargs.getMainClassName() 2467 + "\n" 2468 + "Required params:\n" 2469 + " -classname output_class_name\n" 2470 + " -in input_page_file\n" 2471 + "\nOptional params:\n" 2472 + " -encoding <page_encoding>\n" 2473 + " -contextRoot <webapp root-directory or any other directory>\n" 2474 + " this directory is used as the starting directory for absolute (starting\n" 2475 + " with a \"/\") include/forward directives in a page>. If not specified\n" 2476 + " defaults to the same directory as the page file\n" 2477 + " -out <output_file_name>\n" 2478 + " the output file is optional and defaults to the standard out if not specified." 2479 ); 2480 //String encoding = myargs.get("encoding", Page.DEFAULT_ENCODING); 2481 2482 File input = new File(myargs.getRequired("in")); 2483 File contextRoot = null; 2484 2485 if (myargs.flagExists("contextRoot")) 2486 contextRoot = new File(myargs.get("contextRoot")); 2487 else 2488 contextRoot = input; 2489 2490 PrintWriter output; 2491 2492 if (myargs.get("out") != null) 2493 output = new PrintWriter(new FileWriter(myargs.get("out"))); 2494 else 2495 output = new PrintWriter(new OutputStreamWriter(System.out)); 2496 2497 PageParser parser = new PageParser(contextRoot, input, output, myargs.getRequired("classname"), Log.getDefault()); 2498 parser.parse(); 2499 } 2500 2501}