001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002// The Molly framework is freely distributable under the terms of an
003// MIT-style license. For details, see the molly pages web site at:
004// http://www.mollypages.org/. Use, modify, have fun !
005
006package fc.web.page;
007
008import java.io.*;
009import java.util.*;
010
011import fc.io.*;
012import fc.util.*;
013
014/**
015A Reader suitable for lexing. Supports all of: <code>peek</code>,
016<code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has
017all of those). Mark/reset is not supported because it's too complex to
018implement given the current <i>fixed-buffer</i> implementation of this
019class. (on the flip-side this implementation does allow to read
020<i>very</i> large files without risk of running out of JDK memory).
021<p>
022<xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-]
023<p>
024Note 1: If this class is invoked from the command line, setting the dbg
025flag in the code to <code>true</code> is useful.
026
027@author hursh jain
028*/
029public final class PageReader extends Reader
030{
031/*
032Old implemenation had a rolling buffer window and a previous spillover buffer.
033It was too complicated, so that implemenation is pastured.
034
035It is MUCH simpler to read the entire file into one character array...(unread,
036read, mark, reset) etc, become trivial. By contrast, if using a
037limited-size-read-buffer implementation (as before) all of those become much
038harder since we loose the existing buffer contents when the buffer is *refilled*
039(so how do you go back in the buffer if the buffer is gone ?).
040*/
041static final boolean dbg = false;
042
043//our own buf/pos because most/all reader subclasses dont have mark/reset/unread  
044char[]      buf       = null;
045int       pos       = 0;
046int       count       = 0;
047int       markpos     = 0;
048//line, col and other tracking 
049int       line      = 1;
050int       col       = 0;
051int       lastcol     = 1;  //for unread past a newline
052boolean     pushBackNL    = false;
053boolean     lastWasCR   = false;
054String      encoding;
055static String DEFAULT_ENCODING = "UTF-8";
056
057/** 
058Creates a new PageReader wrapping the specified reader
059*/
060public PageReader(Reader r) throws IOException
061  {
062  Argcheck.notnull(r, "specified reader was null");
063  buf = IOUtil.readerToCharArray(r);
064  this.encoding = DEFAULT_ENCODING;
065  }
066
067/**
068Creates a reader with the specified non-null encoding.
069*/
070public PageReader(File file, String encoding)  throws IOException
071  {
072  Argcheck.notnull(file, "specified file was null");
073  Argcheck.notnull(encoding, "specified encoding was null");
074  this.encoding = encoding;
075  buf = IOUtil.fileToCharArray(file, encoding);
076  }
077
078/**
079Creates a reader using the UTF-8 encoding.
080*/
081public PageReader(File file)  throws IOException
082  {
083  this(file, DEFAULT_ENCODING);
084  }
085
086public void close() throws IOException
087  {
088  //no underlying stream since everything read into buffer. not much to do.
089  }
090  
091public int read() throws IOException
092  {
093  if (pos == buf.length) {
094    return -1;
095    }
096    
097  char c = buf[pos++];  
098  
099  if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c));
100  adjustReadLineNum(c);
101
102  return c;
103  }
104
105public int read(char[] buf, int start, int len) throws IOException
106  {
107  throw new IOException("not implemented, use the read() method instead");
108  }
109
110/**
111Unreads the current character (which could be EOF) so that the next read will 
112return the current character (or EOF) again.
113*/
114public void unread() throws IOException
115  {
116  char c = 0;
117  
118  if (pos == 0) 
119    {
120    throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far");
121    }
122  else{ 
123    c = buf[--pos];
124    if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c));
125    }
126  
127  adjustUnreadLineNum(c);
128  }
129
130/**
131Unreads the specified number of characters
132*/
133public void unread(int count)  throws IOException
134  {
135  for (int n = 0; n < count; n++) {
136    unread();
137    }
138  }
139
140/**
141Useful for inserting included files into the stream and then parsing that content in-line
142with the rest of the file.
143*/
144public void insertIntoStream(File file) throws IOException
145  {
146  char[] insert = IOUtil.fileToCharArray(file, encoding);
147
148  char[] result = new char[buf.length + insert.length];
149  System.arraycopy(buf, 0, result, 0, pos);
150  System.arraycopy(insert, 0, result, pos, insert.length);
151  System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
152  
153  buf = result;
154  }
155
156/**
157Useful for inserting included files into the stream and then parsing that content in-line
158with the rest of the file.
159*/
160public void insertIntoStream(Reader r) throws IOException
161  {
162  char[] insert = IOUtil.readerToCharArray(r);
163
164  char[] result = new char[buf.length + insert.length];
165  System.arraycopy(buf, 0, result, 0, pos);
166  System.arraycopy(insert, 0, result, pos, insert.length);
167  System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
168  
169  buf = result;
170  }
171
172
173void adjustReadLineNum(char c)
174  {
175  // we can read: \r, \r\n , \n all of which increase line count by exactly 1
176  switch (c) 
177    {
178    case '\n': 
179      if (! lastWasCR) {
180        line++;
181        lastcol=col;
182        col=1;
183        }
184      else {
185        lastWasCR = false;
186        }
187      break;
188    
189    case '\r': 
190      line++;
191      lastcol=col;
192      col=1;
193      lastWasCR = true;
194      break;
195    
196    case '\t':
197      col = col + 4;
198      break;
199    
200    default:
201      col++;
202    }
203  }
204
205
206void adjustUnreadLineNum(char c)
207  {
208  // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1
209  switch (c) {
210    case '\n': 
211      pushBackNL = true;
212      line--;
213      col=lastcol;
214      break;
215    case '\r': 
216      if (! pushBackNL) { 
217        line--;
218        col=lastcol;
219        }
220      else{
221        pushBackNL = false;
222        }
223      break;
224    case '\t':
225      col = col - 4;
226      break;
227    default:
228      col--;
229    }
230  }
231
232public int peek() throws IOException
233  {
234  return buf[pos];
235  }
236
237
238/**
239Skips all whitespace characters such that the next {@link read} will
240return the <b>next</b> non-whitespace character (or EOF if there are no
241more characters).
242*/
243public void skipWhitespace() throws IOException
244  {
245  int c = -1;
246  while (true)
247    {
248    c = read();
249    
250    if (c == -1) {
251      break;
252      }
253    
254    if (! Character.isWhitespace(c)) {
255      unread();
256      break;
257      }
258    }
259  }
260  
261
262
263/**
264Tries to read/consumes the specified char and returns true
265if successful. If the specified char is not found, does not
266consume anything and returns false.
267*/
268public boolean match(int target) throws IOException
269  {
270  int c = read();
271  
272  if (c == target)
273    return true;
274  else
275    unread();
276  
277  return false;
278  }
279
280/**
281Tries to read/consumes the specified non-null string and returns true
282if successful. If the specified string is not found, does not
283consume anything and returns false.
284*/
285public boolean match(String target) throws IOException
286  {
287  if (target == null)
288    throw new IllegalArgumentException("Specified target string was null");
289  
290  int c = -1;
291  for (int i = 0; i < target.length(); i++)
292    {
293    c = read();
294    
295    if ( c == -1 || c != target.charAt(i)) {
296      unread(i+1);
297      return false;
298      }
299    }
300  
301  return true;
302  }
303
304public boolean matchIgnoreCase(String target) throws IOException
305  {
306  if (target == null)
307    throw new IllegalArgumentException("Specified target string was null");
308  
309  int c = -1;
310  for (int i = 0; i < target.length(); i++)
311    {
312    c = read();
313    
314    if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) {
315      unread(i+1);
316      return false;
317      }
318
319    }
320  
321  return true;
322  }
323
324public boolean markSupported()
325  {
326  return false;
327  }
328
329public  int getLine() { 
330  return line; 
331  }
332  
333public  int getCol() { 
334  return col; 
335  }
336
337char[] getBuf() { return buf; }
338int getPos() { return pos; }
339
340//other utility methods
341
342public static void main (String args[]) throws IOException
343  {
344  //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */
345  StringReader sr = null;
346  PageReader lex = null;
347  int c = -1;
348  
349  System.out.println("Reading an empty string....."); 
350  sr = new StringReader("");  
351  lex = new PageReader(sr);
352  while ( (c = lex.read()) != -1) {
353    testprint(lex, c);
354    }
355  
356  System.out.println("----------------- TEST 2 --------------");
357  sr = new StringReader("abc");   
358  lex = new PageReader(sr);
359  while ( (c = lex.read()) != -1) {
360    testprint(lex, c);
361    //System.out.print(c + " ");
362    }
363
364  System.out.println("----------------- TEST 3 --------------");
365  sr = new StringReader("abcde");   
366  lex = new PageReader(sr);
367  try {
368    c = lex.read();
369    testprint(lex, c);
370    lex.unread();
371    testprint(lex, -10);
372    lex.unread();
373    testprint(lex, -10);
374    c = lex.read();
375    testprint(lex, c);
376    }
377  catch (Exception e) {
378    e.printStackTrace();
379    }
380
381  System.out.println("----------------- TEST 4 --------------");
382  sr = new StringReader("abcd\ne");   
383  lex = new PageReader(sr);
384  try {
385    c = lex.read();
386    testprint(lex, c);
387    lex.unread();
388    testprint(lex, -10);
389
390    for (int i = 0; i < 5; i++) {
391      c = lex.read();
392      testprint(lex, c);
393      }
394
395    for (int i = 0; i < 5; i++) {
396      lex.unread();
397      testprint(lex, -10);
398      }
399    
400    for (int i = 0; i < 5; i++) {
401      c = lex.read();
402      testprint(lex, c);
403      }
404    
405    c = lex.read();
406    testprint(lex, c);
407    }
408  catch (Exception e) {
409    e.printStackTrace();
410    }
411
412  System.out.println("----------------- TEST 5 --------------");
413  sr = new StringReader("abcd\r\ne");   
414  lex = new PageReader(sr);
415  try {
416    c = lex.read();
417    testprint(lex, c, lex.peek());
418    lex.unread();
419    testprint(lex, -10, lex.peek());
420
421    for (int i = 0; i < 5; i++) {
422      c = lex.read();
423      testprint(lex, c, lex.peek());
424      }
425
426    for (int i = 0; i < 5; i++) {
427      lex.unread();
428      testprint(lex, -10, lex.peek());
429      }
430    
431    for (int i = 0; i < 5; i++) {
432      c = lex.read();
433      testprint(lex, c, lex.peek());
434      }
435    
436    c = lex.read();
437    testprint(lex, c, lex.peek());
438    }
439  catch (Exception e) {
440    e.printStackTrace();
441    }
442
443  System.out.println("--------- TEST 6 ---(insert into stream middle)-------");
444  sr = new StringReader("abc"); 
445  lex = new PageReader(sr);
446  
447  try {
448    c = lex.read();
449    testprint(lex, c);
450  
451    StringReader insert = new StringReader("123");
452    System.out.println("inserting \"123\" into the stream\n");
453    lex.insertIntoStream(insert);
454
455    while ( (c = lex.read()) != -1) {
456      testprint(lex, c);
457      }
458    }
459  catch (Exception e) {
460    e.printStackTrace();
461    }
462
463
464  System.out.println("--------- TEST 7 ---(insert into stream begin)-------");
465  sr = new StringReader("abc"); 
466  lex = new PageReader(sr);
467  
468  try {
469    StringReader insert = new StringReader("123");
470    System.out.println("inserting \"123\" into the beginning of stream\n");
471    lex.insertIntoStream(insert);
472
473    while ( (c = lex.read()) != -1) {
474      testprint(lex, c);
475      }
476    }
477  catch (Exception e) {
478    e.printStackTrace();
479    }
480
481  System.out.println("--------- TEST 8 ---(insert into stream end)-------");
482  sr = new StringReader("abc"); 
483  lex = new PageReader(sr);
484  
485  try {
486    while ( (c = lex.read()) != -1) {
487      testprint(lex, c);
488      }
489    StringReader insert = new StringReader("123");
490    System.out.println("inserting \"123\" into the end of the stream\n");
491    lex.insertIntoStream(insert);
492
493    while ( (c = lex.read()) != -1) {
494      testprint(lex, c);
495      }
496    }
497  catch (Exception e) {
498    e.printStackTrace();
499    }
500
501  }
502
503private static void testprint(PageReader lex, int c, int peek)
504  {
505  if (c == -1) {
506    System.out.println("====> recieved EOF (-1) from read().......");
507    }
508
509  System.out.format(
510    "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]",
511    StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length,
512    lex.getLine(), lex.getCol(), 
513    (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c));  
514
515  if (peek != -2)  
516    System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek));
517
518  System.out.print("\n\n");
519  }
520
521private static void testprint(PageReader lex, int c)
522  {
523  testprint(lex, c, -2);
524  }
525
526}