001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 002// The Molly framework is freely distributable under the terms of an 003// MIT-style license. For details, see the molly pages web site at: 004// http://www.mollypages.org/. Use, modify, have fun ! 005 006package fc.web.page; 007 008import java.io.*; 009import java.util.*; 010 011import fc.io.*; 012import fc.util.*; 013 014/** 015A Reader suitable for lexing. Supports all of: <code>peek</code>, 016<code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has 017all of those). Mark/reset is not supported because it's too complex to 018implement given the current <i>fixed-buffer</i> implementation of this 019class. (on the flip-side this implementation does allow to read 020<i>very</i> large files without risk of running out of JDK memory). 021<p> 022<xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-] 023<p> 024Note 1: If this class is invoked from the command line, setting the dbg 025flag in the code to <code>true</code> is useful. 026 027@author hursh jain 028*/ 029public final class PageReader extends Reader 030{ 031/* 032Old implemenation had a rolling buffer window and a previous spillover buffer. 033It was too complicated, so that implemenation is pastured. 034 035It is MUCH simpler to read the entire file into one character array...(unread, 036read, mark, reset) etc, become trivial. By contrast, if using a 037limited-size-read-buffer implementation (as before) all of those become much 038harder since we loose the existing buffer contents when the buffer is *refilled* 039(so how do you go back in the buffer if the buffer is gone ?). 040*/ 041static final boolean dbg = false; 042 043//our own buf/pos because most/all reader subclasses dont have mark/reset/unread 044char[] buf = null; 045int pos = 0; 046int count = 0; 047int markpos = 0; 048//line, col and other tracking 049int line = 1; 050int col = 0; 051int lastcol = 1; //for unread past a newline 052boolean pushBackNL = false; 053boolean lastWasCR = false; 054String encoding; 055static String DEFAULT_ENCODING = "UTF-8"; 056 057/** 058Creates a new PageReader wrapping the specified reader 059*/ 060public PageReader(Reader r) throws IOException 061 { 062 Argcheck.notnull(r, "specified reader was null"); 063 buf = IOUtil.readerToCharArray(r); 064 this.encoding = DEFAULT_ENCODING; 065 } 066 067/** 068Creates a reader with the specified non-null encoding. 069*/ 070public PageReader(File file, String encoding) throws IOException 071 { 072 Argcheck.notnull(file, "specified file was null"); 073 Argcheck.notnull(encoding, "specified encoding was null"); 074 this.encoding = encoding; 075 buf = IOUtil.fileToCharArray(file, encoding); 076 } 077 078/** 079Creates a reader using the UTF-8 encoding. 080*/ 081public PageReader(File file) throws IOException 082 { 083 this(file, DEFAULT_ENCODING); 084 } 085 086public void close() throws IOException 087 { 088 //no underlying stream since everything read into buffer. not much to do. 089 } 090 091public int read() throws IOException 092 { 093 if (pos == buf.length) { 094 return -1; 095 } 096 097 char c = buf[pos++]; 098 099 if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c)); 100 adjustReadLineNum(c); 101 102 return c; 103 } 104 105public int read(char[] buf, int start, int len) throws IOException 106 { 107 throw new IOException("not implemented, use the read() method instead"); 108 } 109 110/** 111Unreads the current character (which could be EOF) so that the next read will 112return the current character (or EOF) again. 113*/ 114public void unread() throws IOException 115 { 116 char c = 0; 117 118 if (pos == 0) 119 { 120 throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far"); 121 } 122 else{ 123 c = buf[--pos]; 124 if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c)); 125 } 126 127 adjustUnreadLineNum(c); 128 } 129 130/** 131Unreads the specified number of characters 132*/ 133public void unread(int count) throws IOException 134 { 135 for (int n = 0; n < count; n++) { 136 unread(); 137 } 138 } 139 140/** 141Useful for inserting included files into the stream and then parsing that content in-line 142with the rest of the file. 143*/ 144public void insertIntoStream(File file) throws IOException 145 { 146 char[] insert = IOUtil.fileToCharArray(file, encoding); 147 148 char[] result = new char[buf.length + insert.length]; 149 System.arraycopy(buf, 0, result, 0, pos); 150 System.arraycopy(insert, 0, result, pos, insert.length); 151 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 152 153 buf = result; 154 } 155 156/** 157Useful for inserting included files into the stream and then parsing that content in-line 158with the rest of the file. 159*/ 160public void insertIntoStream(Reader r) throws IOException 161 { 162 char[] insert = IOUtil.readerToCharArray(r); 163 164 char[] result = new char[buf.length + insert.length]; 165 System.arraycopy(buf, 0, result, 0, pos); 166 System.arraycopy(insert, 0, result, pos, insert.length); 167 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 168 169 buf = result; 170 } 171 172 173void adjustReadLineNum(char c) 174 { 175 // we can read: \r, \r\n , \n all of which increase line count by exactly 1 176 switch (c) 177 { 178 case '\n': 179 if (! lastWasCR) { 180 line++; 181 lastcol=col; 182 col=1; 183 } 184 else { 185 lastWasCR = false; 186 } 187 break; 188 189 case '\r': 190 line++; 191 lastcol=col; 192 col=1; 193 lastWasCR = true; 194 break; 195 196 case '\t': 197 col = col + 4; 198 break; 199 200 default: 201 col++; 202 } 203 } 204 205 206void adjustUnreadLineNum(char c) 207 { 208 // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1 209 switch (c) { 210 case '\n': 211 pushBackNL = true; 212 line--; 213 col=lastcol; 214 break; 215 case '\r': 216 if (! pushBackNL) { 217 line--; 218 col=lastcol; 219 } 220 else{ 221 pushBackNL = false; 222 } 223 break; 224 case '\t': 225 col = col - 4; 226 break; 227 default: 228 col--; 229 } 230 } 231 232public int peek() throws IOException 233 { 234 return buf[pos]; 235 } 236 237 238/** 239Skips all whitespace characters such that the next {@link read} will 240return the <b>next</b> non-whitespace character (or EOF if there are no 241more characters). 242*/ 243public void skipWhitespace() throws IOException 244 { 245 int c = -1; 246 while (true) 247 { 248 c = read(); 249 250 if (c == -1) { 251 break; 252 } 253 254 if (! Character.isWhitespace(c)) { 255 unread(); 256 break; 257 } 258 } 259 } 260 261 262 263/** 264Tries to read/consumes the specified char and returns true 265if successful. If the specified char is not found, does not 266consume anything and returns false. 267*/ 268public boolean match(int target) throws IOException 269 { 270 int c = read(); 271 272 if (c == target) 273 return true; 274 else 275 unread(); 276 277 return false; 278 } 279 280/** 281Tries to read/consumes the specified non-null string and returns true 282if successful. If the specified string is not found, does not 283consume anything and returns false. 284*/ 285public boolean match(String target) throws IOException 286 { 287 if (target == null) 288 throw new IllegalArgumentException("Specified target string was null"); 289 290 int c = -1; 291 for (int i = 0; i < target.length(); i++) 292 { 293 c = read(); 294 295 if ( c == -1 || c != target.charAt(i)) { 296 unread(i+1); 297 return false; 298 } 299 } 300 301 return true; 302 } 303 304public boolean matchIgnoreCase(String target) throws IOException 305 { 306 if (target == null) 307 throw new IllegalArgumentException("Specified target string was null"); 308 309 int c = -1; 310 for (int i = 0; i < target.length(); i++) 311 { 312 c = read(); 313 314 if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) { 315 unread(i+1); 316 return false; 317 } 318 319 } 320 321 return true; 322 } 323 324public boolean markSupported() 325 { 326 return false; 327 } 328 329public int getLine() { 330 return line; 331 } 332 333public int getCol() { 334 return col; 335 } 336 337char[] getBuf() { return buf; } 338int getPos() { return pos; } 339 340//other utility methods 341 342public static void main (String args[]) throws IOException 343 { 344 //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */ 345 StringReader sr = null; 346 PageReader lex = null; 347 int c = -1; 348 349 System.out.println("Reading an empty string....."); 350 sr = new StringReader(""); 351 lex = new PageReader(sr); 352 while ( (c = lex.read()) != -1) { 353 testprint(lex, c); 354 } 355 356 System.out.println("----------------- TEST 2 --------------"); 357 sr = new StringReader("abc"); 358 lex = new PageReader(sr); 359 while ( (c = lex.read()) != -1) { 360 testprint(lex, c); 361 //System.out.print(c + " "); 362 } 363 364 System.out.println("----------------- TEST 3 --------------"); 365 sr = new StringReader("abcde"); 366 lex = new PageReader(sr); 367 try { 368 c = lex.read(); 369 testprint(lex, c); 370 lex.unread(); 371 testprint(lex, -10); 372 lex.unread(); 373 testprint(lex, -10); 374 c = lex.read(); 375 testprint(lex, c); 376 } 377 catch (Exception e) { 378 e.printStackTrace(); 379 } 380 381 System.out.println("----------------- TEST 4 --------------"); 382 sr = new StringReader("abcd\ne"); 383 lex = new PageReader(sr); 384 try { 385 c = lex.read(); 386 testprint(lex, c); 387 lex.unread(); 388 testprint(lex, -10); 389 390 for (int i = 0; i < 5; i++) { 391 c = lex.read(); 392 testprint(lex, c); 393 } 394 395 for (int i = 0; i < 5; i++) { 396 lex.unread(); 397 testprint(lex, -10); 398 } 399 400 for (int i = 0; i < 5; i++) { 401 c = lex.read(); 402 testprint(lex, c); 403 } 404 405 c = lex.read(); 406 testprint(lex, c); 407 } 408 catch (Exception e) { 409 e.printStackTrace(); 410 } 411 412 System.out.println("----------------- TEST 5 --------------"); 413 sr = new StringReader("abcd\r\ne"); 414 lex = new PageReader(sr); 415 try { 416 c = lex.read(); 417 testprint(lex, c, lex.peek()); 418 lex.unread(); 419 testprint(lex, -10, lex.peek()); 420 421 for (int i = 0; i < 5; i++) { 422 c = lex.read(); 423 testprint(lex, c, lex.peek()); 424 } 425 426 for (int i = 0; i < 5; i++) { 427 lex.unread(); 428 testprint(lex, -10, lex.peek()); 429 } 430 431 for (int i = 0; i < 5; i++) { 432 c = lex.read(); 433 testprint(lex, c, lex.peek()); 434 } 435 436 c = lex.read(); 437 testprint(lex, c, lex.peek()); 438 } 439 catch (Exception e) { 440 e.printStackTrace(); 441 } 442 443 System.out.println("--------- TEST 6 ---(insert into stream middle)-------"); 444 sr = new StringReader("abc"); 445 lex = new PageReader(sr); 446 447 try { 448 c = lex.read(); 449 testprint(lex, c); 450 451 StringReader insert = new StringReader("123"); 452 System.out.println("inserting \"123\" into the stream\n"); 453 lex.insertIntoStream(insert); 454 455 while ( (c = lex.read()) != -1) { 456 testprint(lex, c); 457 } 458 } 459 catch (Exception e) { 460 e.printStackTrace(); 461 } 462 463 464 System.out.println("--------- TEST 7 ---(insert into stream begin)-------"); 465 sr = new StringReader("abc"); 466 lex = new PageReader(sr); 467 468 try { 469 StringReader insert = new StringReader("123"); 470 System.out.println("inserting \"123\" into the beginning of stream\n"); 471 lex.insertIntoStream(insert); 472 473 while ( (c = lex.read()) != -1) { 474 testprint(lex, c); 475 } 476 } 477 catch (Exception e) { 478 e.printStackTrace(); 479 } 480 481 System.out.println("--------- TEST 8 ---(insert into stream end)-------"); 482 sr = new StringReader("abc"); 483 lex = new PageReader(sr); 484 485 try { 486 while ( (c = lex.read()) != -1) { 487 testprint(lex, c); 488 } 489 StringReader insert = new StringReader("123"); 490 System.out.println("inserting \"123\" into the end of the stream\n"); 491 lex.insertIntoStream(insert); 492 493 while ( (c = lex.read()) != -1) { 494 testprint(lex, c); 495 } 496 } 497 catch (Exception e) { 498 e.printStackTrace(); 499 } 500 501 } 502 503private static void testprint(PageReader lex, int c, int peek) 504 { 505 if (c == -1) { 506 System.out.println("====> recieved EOF (-1) from read()......."); 507 } 508 509 System.out.format( 510 "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]", 511 StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length, 512 lex.getLine(), lex.getCol(), 513 (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c)); 514 515 if (peek != -2) 516 System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek)); 517 518 System.out.print("\n\n"); 519 } 520 521private static void testprint(PageReader lex, int c) 522 { 523 testprint(lex, c, -2); 524 } 525 526}