001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002// The Molly framework is freely distributable under the terms of an
003// MIT-style license. For details, see the molly pages web site at:
004// http://www.mollypages.org/. Use, modify, have fun !
005
006package fc.web.page;
007
008import java.io.*;
009import java.util.*;
010
011import fc.io.*;
012import fc.util.*;
013
014/**
015A Reader suitable for lexing. Supports all of: <code>peek</code>,
016<code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has
017all of those). Mark/reset is not supported because it's too complex to
018implement given the current <i>fixed-buffer</i> implementation of this
019class. (on the flip-side this implementation does allow to read
020<i>very</i> large files without risk of running out of JDK memory).
021<p>
022<xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-]
023<p>
024Note 1: If this class is invoked from the command line, setting the dbg
025flag in the code to <code>true</code> is useful.
026
027@author hursh jain
028*/
029public final class PageReader extends Reader
030{
031/*
032Old implemenation had a rolling buffer window and a previous spillover buffer.
033It was too complicated, so that implemenation is pastured.
034
035It is MUCH simpler to read the entire file into one character array...(unread,
036read, mark, reset) etc, become trivial. By contrast, if using a
037limited-size-read-buffer implementation (as before) all of those become much
038harder since we loose the existing buffer contents when the buffer is *refilled*
039(so how do you go back in the buffer if the buffer is gone ?).
040*/
041static final boolean dbg = false;
042
043//our own buf/pos because most/all reader subclasses dont have mark/reset/unread  
044char[]      buf       = null;
045int       pos       = 0;
046int       count       = 0;
047int       markpos     = 0;
048//line, col and other tracking 
049int       line      = 1;
050int       col       = 0;
051int       lastcol     = 1;  //for unread past a newline
052boolean     pushBackNL    = false;
053boolean     lastWasCR   = false;
054String      encoding;
055static String DEFAULT_ENCODING = "UTF-8";
056
057/** 
058Creates a new PageReader wrapping the specified reader
059*/
060public PageReader(Reader r) throws IOException
061  {
062  Argcheck.notnull(r, "specified reader was null");
063  buf = IOUtil.readerToCharArray(r);
064  this.encoding = DEFAULT_ENCODING;
065  }
066
067/**
068Creates a reader with the specified non-null encoding.
069*/
070public PageReader(File file, String encoding)  throws IOException
071  {
072  Argcheck.notnull(file, "specified file was null");
073  Argcheck.notnull(encoding, "specified encoding was null");
074  this.encoding = encoding;
075  buf = IOUtil.fileToCharArray(file, encoding);
076  }
077
078/**
079Creates a reader using the UTF-8 encoding.
080*/
081public PageReader(File file)  throws IOException
082  {
083  this(file, DEFAULT_ENCODING);
084  }
085
086public void close() throws IOException
087  {
088  //no underlying stream since everything read into buffer. not much to do.
089  }
090  
091public int read() throws IOException
092  {
093  if (pos == buf.length) {
094    return -1;
095    }
096    
097  char c = buf[pos++];  
098  
099  if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c));
100  adjustReadLineNum(c);
101
102  return c;
103  }
104
105public int read(char[] buf, int start, int len) throws IOException
106  {
107  throw new IOException("not implemented, use the read() method instead");
108  }
109
110/**
111Unreads the current character (which could be EOF) so that the next read will 
112return the current character (or EOF) again.
113*/
114public void unread() throws IOException
115  {
116  char c = 0;
117  
118  if (pos == 0) 
119    {
120    throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far");
121    }
122  else{ 
123    c = buf[--pos];
124    if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c));
125    }
126  
127  adjustUnreadLineNum(c);
128  }
129
130/**
131Unreads the specified number of characters
132*/
133public void unread(int count)  throws IOException
134  {
135  for (int n = 0; n < count; n++) {
136    unread();
137    }
138  }
139
140/**
141Useful for inserting included files into the stream and then parsing that content in-line
142with the rest of the file.
143*/
144public void insertIntoStream(File file) throws IOException
145  {
146  char[] insert = IOUtil.fileToCharArray(file, encoding);
147
148  char[] result = new char[buf.length + insert.length];
149  System.arraycopy(buf, 0, result, 0, pos);
150  System.arraycopy(insert, 0, result, pos, insert.length);
151  System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
152  
153  buf = result;
154  }
155
156/**
157Useful for inserting included files into the stream and then parsing that content in-line
158with the rest of the file.
159*/
160public void insertIntoStream(Reader r) throws IOException
161  {
162  char[] insert = IOUtil.readerToCharArray(r);
163
164  char[] result = new char[buf.length + insert.length];
165  System.arraycopy(buf, 0, result, 0, pos);
166  System.arraycopy(insert, 0, result, pos, insert.length);
167  System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
168  
169  buf = result;
170  }
171
172
173void adjustReadLineNum(char c)
174  {
175  // we can read: \r, \r\n , \n all of which increase line count by exactly 1
176  switch (c) 
177    {
178    case '\n': 
179      if (! lastWasCR) {
180        line++;
181        lastcol=col;
182        col=1;
183        }
184      else {
185        lastWasCR = false;
186        }
187      break;
188    
189    case '\r': 
190      line++;
191      lastcol=col;
192      col=1;
193      lastWasCR = true;
194      break;
195    
196    case '\t':
197      col = col + 4;
198      break;
199    
200    default:
201      col++;
202    }
203  }
204
205
206void adjustUnreadLineNum(char c)
207  {
208  // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1
209  switch (c) {
210    case '\n': 
211      pushBackNL = true;
212      line--;
213      col=lastcol;
214      break;
215    case '\r': 
216      if (! pushBackNL) { 
217        line--;
218        col=lastcol;
219        }
220      else{
221        pushBackNL = false;
222        }
223      break;
224    case '\t':
225      col = col - 4;
226      break;
227    default:
228      col--;
229    }
230  }
231
232public int peek() throws IOException
233  {
234  return buf[pos];
235  }
236
237
238/**
239Skips all whitespace characters such that the next {@link read} will
240return the <b>next</b> non-whitespace character (or EOF if there are no
241more characters).
242*/
243public void skipWhitespace() throws IOException
244  {
245  int c = -1;
246  while (true)
247    {
248    c = read();
249    
250    if (c == -1) {
251      break;
252      }
253    
254    if (! Character.isWhitespace(c)) {
255      unread();
256      break;
257      }
258    }
259  }
260  
261
262/**
263Tries to read/consumes the specified char and returns true
264if successful. If the specified char is not found, does not
265consume anything and returns false.
266*/
267public boolean match(int target) throws IOException
268  {
269  int c = read();
270  
271  if (c == target)
272    return true;
273  else
274    unread();
275  
276  return false;
277  }
278
279/**
280Tries to read/consumes the specified non-null string and returns true
281if successful. If the specified string is not found, does not
282consume anything and returns false.
283*/
284public boolean match(String target) throws IOException
285  {
286  if (target == null)
287    throw new IllegalArgumentException("Specified target string was null");
288  
289  int c = -1;
290  for (int i = 0; i < target.length(); i++)
291    {
292    c = read();
293    
294    if ( c == -1 || c != target.charAt(i)) {
295      unread(i+1);
296      return false;
297      }
298    }
299  
300  return true;
301  }
302
303public boolean matchIgnoreCase(String target) throws IOException
304  {
305  if (target == null)
306    throw new IllegalArgumentException("Specified target string was null");
307  
308  int c = -1;
309  for (int i = 0; i < target.length(); i++)
310    {
311    c = read();
312    
313    if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) {
314      unread(i+1);
315      return false;
316      }
317
318    }
319  
320  return true;
321  }
322
323public boolean markSupported()
324  {
325  return false;
326  }
327
328public  int getLine() { 
329  return line; 
330  }
331  
332public  int getCol() { 
333  return col; 
334  }
335
336char[] getBuf() { return buf; }
337int getPos() { return pos; }
338
339//other utility methods
340
341public static void main (String args[]) throws IOException
342  {
343  //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */
344  StringReader sr = null;
345  PageReader lex = null;
346  int c = -1;
347  
348  System.out.println("Reading an empty string....."); 
349  sr = new StringReader("");  
350  lex = new PageReader(sr);
351  while ( (c = lex.read()) != -1) {
352    testprint(lex, c);
353    }
354  
355  System.out.println("----------------- TEST 2 --------------");
356  sr = new StringReader("abc");   
357  lex = new PageReader(sr);
358  while ( (c = lex.read()) != -1) {
359    testprint(lex, c);
360    //System.out.print(c + " ");
361    }
362
363  System.out.println("----------------- TEST 3 --------------");
364  sr = new StringReader("abcde");   
365  lex = new PageReader(sr);
366  try {
367    c = lex.read();
368    testprint(lex, c);
369    lex.unread();
370    testprint(lex, -10);
371    lex.unread();
372    testprint(lex, -10);
373    c = lex.read();
374    testprint(lex, c);
375    }
376  catch (Exception e) {
377    e.printStackTrace();
378    }
379
380  System.out.println("----------------- TEST 4 --------------");
381  sr = new StringReader("abcd\ne");   
382  lex = new PageReader(sr);
383  try {
384    c = lex.read();
385    testprint(lex, c);
386    lex.unread();
387    testprint(lex, -10);
388
389    for (int i = 0; i < 5; i++) {
390      c = lex.read();
391      testprint(lex, c);
392      }
393
394    for (int i = 0; i < 5; i++) {
395      lex.unread();
396      testprint(lex, -10);
397      }
398    
399    for (int i = 0; i < 5; i++) {
400      c = lex.read();
401      testprint(lex, c);
402      }
403    
404    c = lex.read();
405    testprint(lex, c);
406    }
407  catch (Exception e) {
408    e.printStackTrace();
409    }
410
411  System.out.println("----------------- TEST 5 --------------");
412  sr = new StringReader("abcd\r\ne");   
413  lex = new PageReader(sr);
414  try {
415    c = lex.read();
416    testprint(lex, c, lex.peek());
417    lex.unread();
418    testprint(lex, -10, lex.peek());
419
420    for (int i = 0; i < 5; i++) {
421      c = lex.read();
422      testprint(lex, c, lex.peek());
423      }
424
425    for (int i = 0; i < 5; i++) {
426      lex.unread();
427      testprint(lex, -10, lex.peek());
428      }
429    
430    for (int i = 0; i < 5; i++) {
431      c = lex.read();
432      testprint(lex, c, lex.peek());
433      }
434    
435    c = lex.read();
436    testprint(lex, c, lex.peek());
437    }
438  catch (Exception e) {
439    e.printStackTrace();
440    }
441
442  System.out.println("--------- TEST 6 ---(insert into stream middle)-------");
443  sr = new StringReader("abc"); 
444  lex = new PageReader(sr);
445  
446  try {
447    c = lex.read();
448    testprint(lex, c);
449  
450    StringReader insert = new StringReader("123");
451    System.out.println("inserting \"123\" into the stream\n");
452    lex.insertIntoStream(insert);
453
454    while ( (c = lex.read()) != -1) {
455      testprint(lex, c);
456      }
457    }
458  catch (Exception e) {
459    e.printStackTrace();
460    }
461
462
463  System.out.println("--------- TEST 7 ---(insert into stream begin)-------");
464  sr = new StringReader("abc"); 
465  lex = new PageReader(sr);
466  
467  try {
468    StringReader insert = new StringReader("123");
469    System.out.println("inserting \"123\" into the beginning of stream\n");
470    lex.insertIntoStream(insert);
471
472    while ( (c = lex.read()) != -1) {
473      testprint(lex, c);
474      }
475    }
476  catch (Exception e) {
477    e.printStackTrace();
478    }
479
480  System.out.println("--------- TEST 8 ---(insert into stream end)-------");
481  sr = new StringReader("abc"); 
482  lex = new PageReader(sr);
483  
484  try {
485    while ( (c = lex.read()) != -1) {
486      testprint(lex, c);
487      }
488    StringReader insert = new StringReader("123");
489    System.out.println("inserting \"123\" into the end of the stream\n");
490    lex.insertIntoStream(insert);
491
492    while ( (c = lex.read()) != -1) {
493      testprint(lex, c);
494      }
495    }
496  catch (Exception e) {
497    e.printStackTrace();
498    }
499
500  }
501
502private static void testprint(PageReader lex, int c, int peek)
503  {
504  if (c == -1) {
505    System.out.println("====> recieved EOF (-1) from read().......");
506    }
507
508  System.out.format(
509    "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]",
510    StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length,
511    lex.getLine(), lex.getCol(), 
512    (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c));  
513
514  if (peek != -2)  
515    System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek));
516
517  System.out.print("\n\n");
518  }
519
520private static void testprint(PageReader lex, int c)
521  {
522  testprint(lex, c, -2);
523  }
524
525}