001    // Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002    // The Molly framework is freely distributable under the terms of an
003    // MIT-style license. For details, see the molly pages web site at:
004    // http://www.mollypages.org/. Use, modify, have fun !
005    
006    package fc.util;
007    
008    import java.util.*;
009    import java.util.regex.*;
010    
011    /* 
012    HTML Utility functions
013    
014    @author hursh jain
015    */
016    public final class HTMLUtil
017    {
018    /**
019    Replaces all occurences of single <i>and</i> double quotes
020    with corresponding HTML entities. This is useful when setting 
021    attribute values containing those characters and maintaining
022    state for characters typed by the user.
023    For example,
024    <pre>
025    &lt;input type=text value=<font size="+1">'</font><font color=blue>O<font size="+1"><font size="+1" color=red>'</font></font>Reilly</font><font size="+1">'</font>&gt;
026    </pre>
027    <tt>O'Reilly</tt> could have been typed in by the user (and we are maintaining
028    state so we have to show the value back to the user) or it could have been
029    retrieved from the database. Either way, when the form is resubmitted, it would
030    not be sent properly by the browser.
031    <p>
032    The embedded single quote in the value trips up the browser, because it
033    prematurely ends the value. One would think that the following backslash-escape
034    would work:
035    <pre>
036    &lt;input type=text value='<font color=blue>O<font size="+1" color=red>\</font><b>'</b>Reilly</font>'&gt;
037    </pre>
038    Unfortunately, escaping like this does <b>not</b> work reliably in firefox,
039    safari or IE. 
040    <p>
041    Here is another example:
042    <pre>
043    &lt;input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>"</font>bar</font><font size="+1">"</font>&gt;
044    </pre>
045    The following escape does <b>not</b> work either:
046    <pre>
047    &lt;input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>\"</font>bar</font><font size="+1">"</font>&gt;
048    </pre>
049    To be safe, all embedded quotes must be encoded using character escapes:
050    (<tt>single quote (') as &amp;#39; </tt>) and double quote (<tt>double (") as
051    &amp;#34; </tt>). So
052    <pre>
053    &lt;input type=text value='<font color=blue>O<b>&amp;#39;</b>Reilly</font>'&gt;
054    </pre>
055    This works fine and is submitted by the browser as <tt>O'Reilly</tt>
056    <p>
057    This method is critically useful. <a href="http://www.imdb.com/title/tt0083929/quotes">Learn it. Live it</a>.
058    */
059    public static String quoteToEntity(String str)
060      {
061      if (str == null || str.equals(""))
062        return str;
063      
064      String ret = str
065        .replace("\"", "&#34;")
066        .replace("'", "&#39;");
067        
068      return ret;
069      }
070    
071    /**
072    Replaces embedded entities for single and double quotes, back to the
073    correspoding single and double quote characters. This method is
074    the converse of {@link #quoteToEntity(String)}
075    */
076    public static String entityToQuote(String str)
077      {
078      if (str == null || str.equals(""))
079        return str;
080      
081      String ret = str
082        .replace("&#34;", "\"")
083        .replace("&#39;", "'");   
084      
085      return ret;
086      }
087    
088    private static void test(String desc, String s)
089      {
090      String tmp = null, tmp2 = null;
091      
092      tmp = HTMLUtil.quoteToEntity(s);
093      tmp2 = HTMLUtil.entityToQuote(tmp);
094    
095      System.out.println(  String.format("%15s",desc) + " -> " 
096                 + String.format("%15s", tmp) + " -> " 
097                 + String.format("%10s", tmp2)
098                 ); 
099      }
100    
101    public static void main (String args[])
102      {
103      test("empty", "");
104      test("spaces", "  ");
105      test("single-quote", "'");
106      test("double-quote", "\"");
107      test("X'Y\"Z", "X'Y\"Z");
108      test("\\'\\'", "\\'\\'");
109        }
110    }     //~class HTMLUtil