001    // Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002    // The Molly framework is freely distributable under the terms of an
003    // MIT-style license. For details, see the molly pages web site at:
004    // http://www.mollypages.org/. Use, modify, have fun !
005    
006    package fc.web.page;
007    
008    import java.io.*;
009    import java.util.*;
010    
011    import fc.io.*;
012    import fc.util.*;
013    
014    /**
015    A Reader suitable for lexing. Supports all of: <code>peek</code>,
016    <code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has
017    all of those). Mark/reset is not supported because it's too complex to
018    implement given the current <i>fixed-buffer</i> implementation of this
019    class. (on the flip-side this implementation does allow to read
020    <i>very</i> large files without risk of running out of JDK memory).
021    <p>
022    <xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-]
023    <p>
024    Note 1: If this class is invoked from the command line, setting the dbg
025    flag in the code to <code>true</code> is useful.
026    
027    @author hursh jain
028    */
029    public final class PageReader extends Reader
030    {
031    /*
032    Old implemenation had a rolling buffer window and a previous spillover buffer.
033    It was too complicated, so that implemenation is pastured.
034    
035    It is MUCH simpler to read the entire file into one character array...(unread,
036    read, mark, reset) etc, become trivial. By contrast, if using a
037    limited-size-read-buffer implementation (as before) all of those become much
038    harder since we loose the existing buffer contents when the buffer is *refilled*
039    (so how do you go back in the buffer if the buffer is gone ?).
040    */
041    static final boolean dbg = false;
042    
043    //our own buf/pos because most/all reader subclasses dont have mark/reset/unread  
044    char[]      buf       = null;
045    int       pos       = 0;
046    int       count       = 0;
047    int       markpos     = 0;
048    //line, col and other tracking 
049    int       line      = 1;
050    int       col       = 0;
051    int       lastcol     = 1;  //for unread past a newline
052    boolean     pushBackNL    = false;
053    boolean     lastWasCR   = false;
054    String      encoding;
055    static String DEFAULT_ENCODING = "UTF-8";
056    
057    /** 
058    Creates a new PageReader wrapping the specified reader
059    */
060    public PageReader(Reader r) throws IOException
061      {
062      Argcheck.notnull(r, "specified reader was null");
063      buf = IOUtil.readerToCharArray(r);
064      this.encoding = DEFAULT_ENCODING;
065      }
066    
067    /**
068    Creates a reader with the specified non-null encoding.
069    */
070    public PageReader(File file, String encoding)  throws IOException
071      {
072      Argcheck.notnull(file, "specified file was null");
073      Argcheck.notnull(encoding, "specified encoding was null");
074      this.encoding = encoding;
075      buf = IOUtil.fileToCharArray(file, encoding);
076      }
077    
078    /**
079    Creates a reader using the UTF-8 encoding.
080    */
081    public PageReader(File file)  throws IOException
082      {
083      this(file, DEFAULT_ENCODING);
084      }
085    
086    public void close() throws IOException
087      {
088      //no underlying stream since everything read into buffer. not much to do.
089      }
090      
091    public int read() throws IOException
092      {
093      if (pos == buf.length) {
094        return -1;
095        }
096        
097      char c = buf[pos++];  
098      
099      if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c));
100      adjustReadLineNum(c);
101    
102      return c;
103      }
104    
105    public int read(char[] buf, int start, int len) throws IOException
106      {
107      throw new IOException("not implemented, use the read() method instead");
108      }
109    
110    /**
111    Unreads the current character (which could be EOF) so that the next read will 
112    return the current character (or EOF) again.
113    */
114    public void unread() throws IOException
115      {
116      char c = 0;
117      
118      if (pos == 0) 
119        {
120        throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far");
121        }
122      else{ 
123        c = buf[--pos];
124        if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c));
125        }
126      
127      adjustUnreadLineNum(c);
128      }
129    
130    /**
131    Unreads the specified number of characters
132    */
133    public void unread(int count)  throws IOException
134      {
135      for (int n = 0; n < count; n++) {
136        unread();
137        }
138      }
139    
140    /**
141    Useful for inserting included files into the stream and then parsing that content in-line
142    with the rest of the file.
143    */
144    public void insertIntoStream(File file) throws IOException
145      {
146      char[] insert = IOUtil.fileToCharArray(file, encoding);
147    
148      char[] result = new char[buf.length + insert.length];
149      System.arraycopy(buf, 0, result, 0, pos);
150      System.arraycopy(insert, 0, result, pos, insert.length);
151      System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
152      
153      buf = result;
154      }
155    
156    /**
157    Useful for inserting included files into the stream and then parsing that content in-line
158    with the rest of the file.
159    */
160    public void insertIntoStream(Reader r) throws IOException
161      {
162      char[] insert = IOUtil.readerToCharArray(r);
163    
164      char[] result = new char[buf.length + insert.length];
165      System.arraycopy(buf, 0, result, 0, pos);
166      System.arraycopy(insert, 0, result, pos, insert.length);
167      System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
168      
169      buf = result;
170      }
171    
172    
173    void adjustReadLineNum(char c)
174      {
175      // we can read: \r, \r\n , \n all of which increase line count by exactly 1
176      switch (c) 
177        {
178        case '\n': 
179          if (! lastWasCR) {
180            line++;
181            lastcol=col;
182            col=1;
183            }
184          else {
185            lastWasCR = false;
186            }
187          break;
188        
189        case '\r': 
190          line++;
191          lastcol=col;
192          col=1;
193          lastWasCR = true;
194          break;
195        
196        case '\t':
197          col = col + 4;
198          break;
199        
200        default:
201          col++;
202        }
203      }
204    
205    
206    void adjustUnreadLineNum(char c)
207      {
208      // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1
209      switch (c) {
210        case '\n': 
211          pushBackNL = true;
212          line--;
213          col=lastcol;
214          break;
215        case '\r': 
216          if (! pushBackNL) { 
217            line--;
218            col=lastcol;
219            }
220          else{
221            pushBackNL = false;
222            }
223          break;
224        case '\t':
225          col = col - 4;
226          break;
227        default:
228          col--;
229        }
230      }
231    
232    public int peek() throws IOException
233      {
234      return buf[pos];
235      }
236    
237    
238    /**
239    Skips all whitespace characters such that the next {@link read} will
240    return the <b>next</b> non-whitespace character (or EOF if there are no
241    more characters).
242    */
243    public void skipWhitespace() throws IOException
244      {
245      int c = -1;
246      while (true)
247        {
248        c = read();
249        
250        if (c == -1) {
251          break;
252          }
253        
254        if (! Character.isWhitespace(c)) {
255          unread();
256          break;
257          }
258        }
259      }
260      
261    
262    
263    /**
264    Tries to read/consumes the specified char and returns true
265    if successful. If the specified char is not found, does not
266    consume anything and returns false.
267    */
268    public boolean match(int target) throws IOException
269      {
270      int c = read();
271      
272      if (c == target)
273        return true;
274      else
275        unread();
276      
277      return false;
278      }
279    
280    /**
281    Tries to read/consumes the specified non-null string and returns true
282    if successful. If the specified string is not found, does not
283    consume anything and returns false.
284    */
285    public boolean match(String target) throws IOException
286      {
287      if (target == null)
288        throw new IllegalArgumentException("Specified target string was null");
289      
290      int c = -1;
291      for (int i = 0; i < target.length(); i++)
292        {
293        c = read();
294        
295        if ( c == -1 || c != target.charAt(i)) {
296          unread(i+1);
297          return false;
298          }
299        }
300      
301      return true;
302      }
303    
304    public boolean matchIgnoreCase(String target) throws IOException
305      {
306      if (target == null)
307        throw new IllegalArgumentException("Specified target string was null");
308      
309      int c = -1;
310      for (int i = 0; i < target.length(); i++)
311        {
312        c = read();
313        
314        if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) {
315          unread(i+1);
316          return false;
317          }
318    
319        }
320      
321      return true;
322      }
323    
324    public boolean markSupported()
325      {
326      return false;
327      }
328    
329    public  int getLine() { 
330      return line; 
331      }
332      
333    public  int getCol() { 
334      return col; 
335      }
336    
337    char[] getBuf() { return buf; }
338    int getPos() { return pos; }
339    
340    //other utility methods
341    
342    public static void main (String args[]) throws IOException
343      {
344      //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */
345      StringReader sr = null;
346      PageReader lex = null;
347      int c = -1;
348      
349      System.out.println("Reading an empty string....."); 
350      sr = new StringReader("");  
351      lex = new PageReader(sr);
352      while ( (c = lex.read()) != -1) {
353        testprint(lex, c);
354        }
355      
356      System.out.println("----------------- TEST 2 --------------");
357      sr = new StringReader("abc");   
358      lex = new PageReader(sr);
359      while ( (c = lex.read()) != -1) {
360        testprint(lex, c);
361        //System.out.print(c + " ");
362        }
363    
364      System.out.println("----------------- TEST 3 --------------");
365      sr = new StringReader("abcde");   
366      lex = new PageReader(sr);
367      try {
368        c = lex.read();
369        testprint(lex, c);
370        lex.unread();
371        testprint(lex, -10);
372        lex.unread();
373        testprint(lex, -10);
374        c = lex.read();
375        testprint(lex, c);
376        }
377      catch (Exception e) {
378        e.printStackTrace();
379        }
380    
381      System.out.println("----------------- TEST 4 --------------");
382      sr = new StringReader("abcd\ne");   
383      lex = new PageReader(sr);
384      try {
385        c = lex.read();
386        testprint(lex, c);
387        lex.unread();
388        testprint(lex, -10);
389    
390        for (int i = 0; i < 5; i++) {
391          c = lex.read();
392          testprint(lex, c);
393          }
394    
395        for (int i = 0; i < 5; i++) {
396          lex.unread();
397          testprint(lex, -10);
398          }
399        
400        for (int i = 0; i < 5; i++) {
401          c = lex.read();
402          testprint(lex, c);
403          }
404        
405        c = lex.read();
406        testprint(lex, c);
407        }
408      catch (Exception e) {
409        e.printStackTrace();
410        }
411    
412      System.out.println("----------------- TEST 5 --------------");
413      sr = new StringReader("abcd\r\ne");   
414      lex = new PageReader(sr);
415      try {
416        c = lex.read();
417        testprint(lex, c, lex.peek());
418        lex.unread();
419        testprint(lex, -10, lex.peek());
420    
421        for (int i = 0; i < 5; i++) {
422          c = lex.read();
423          testprint(lex, c, lex.peek());
424          }
425    
426        for (int i = 0; i < 5; i++) {
427          lex.unread();
428          testprint(lex, -10, lex.peek());
429          }
430        
431        for (int i = 0; i < 5; i++) {
432          c = lex.read();
433          testprint(lex, c, lex.peek());
434          }
435        
436        c = lex.read();
437        testprint(lex, c, lex.peek());
438        }
439      catch (Exception e) {
440        e.printStackTrace();
441        }
442    
443      System.out.println("--------- TEST 6 ---(insert into stream middle)-------");
444      sr = new StringReader("abc"); 
445      lex = new PageReader(sr);
446      
447      try {
448        c = lex.read();
449        testprint(lex, c);
450      
451        StringReader insert = new StringReader("123");
452        System.out.println("inserting \"123\" into the stream\n");
453        lex.insertIntoStream(insert);
454    
455        while ( (c = lex.read()) != -1) {
456          testprint(lex, c);
457          }
458        }
459      catch (Exception e) {
460        e.printStackTrace();
461        }
462    
463    
464      System.out.println("--------- TEST 7 ---(insert into stream begin)-------");
465      sr = new StringReader("abc"); 
466      lex = new PageReader(sr);
467      
468      try {
469        StringReader insert = new StringReader("123");
470        System.out.println("inserting \"123\" into the beginning of stream\n");
471        lex.insertIntoStream(insert);
472    
473        while ( (c = lex.read()) != -1) {
474          testprint(lex, c);
475          }
476        }
477      catch (Exception e) {
478        e.printStackTrace();
479        }
480    
481      System.out.println("--------- TEST 8 ---(insert into stream end)-------");
482      sr = new StringReader("abc"); 
483      lex = new PageReader(sr);
484      
485      try {
486        while ( (c = lex.read()) != -1) {
487          testprint(lex, c);
488          }
489        StringReader insert = new StringReader("123");
490        System.out.println("inserting \"123\" into the end of the stream\n");
491        lex.insertIntoStream(insert);
492    
493        while ( (c = lex.read()) != -1) {
494          testprint(lex, c);
495          }
496        }
497      catch (Exception e) {
498        e.printStackTrace();
499        }
500    
501      }
502    
503    private static void testprint(PageReader lex, int c, int peek)
504      {
505      if (c == -1) {
506        System.out.println("====> recieved EOF (-1) from read().......");
507        }
508    
509      System.out.format(
510        "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]",
511        StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length,
512        lex.getLine(), lex.getCol(), 
513        (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c));  
514    
515      if (peek != -2)  
516        System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek));
517    
518      System.out.print("\n\n");
519      }
520    
521    private static void testprint(PageReader lex, int c)
522      {
523      testprint(lex, c, -2);
524      }
525    
526    }