// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
// The Molly framework is freely distributable under the terms of an
// MIT-style license. For details, see the molly pages web site at:
// http://www.mollypages.org/. Use, modify, have fun !

package fc.web.page;

import java.io.*;
import java.util.*;

import fc.io.*;
import fc.util.*;

/**
A Reader suitable for lexing. Supports all of: <code>peek</code>,
<code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has
all of those). Mark/reset is not supported because it's too complex to
implement given the current <i>fixed-buffer</i> implementation of this
class. (on the flip-side this implementation does allow to read
<i>very</i> large files without risk of running out of JDK memory).
<p>
<xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-]
<p>
Note 1: If this class is invoked from the command line, setting the dbg
flag in the code to <code>true</code> is useful.

@author hursh jain
*/
public final class PageReader extends Reader
{
/*
IMPL NOTE: It is MUCH MUCH simpler to read the entire file into one
character array...(unread, read, mark, reset) etc, become trivial. By
contrast, if using a limited-size-read-buffer implementation, all of
those become much harder since we loose the existing buffer contents when
the buffer is *refilled* (so how do you go back in the buffer if the
buffer is gone ?). I have used an fixed size buffer implementation
because it's probably better for avoiding out-of-memory whilst reading
huge files... but really this is unnecessary..
*/
static final boolean dbg = false;

static final int BUF_SIZE;
static {
	if (dbg) 
		BUF_SIZE = 4;
	else 
		BUF_SIZE = 8128;
	}

Reader			reader;
//our own buf/pos because most/all reader subclasses dont have mark/reset/unread  
char[]			prevbuf			= null;
int				prevbuf_pos  	= BUF_SIZE;
char[]			buf 			= new char[BUF_SIZE];
int				pos				= 0;
int				count 			= 0;
int				markpos			= 0;
//line, col and other tracking 
int				line			= 1;
int				col				= 0;
int				lastcol			= 1; 	//for unread past a newline
boolean			pushBackNL		= false;
boolean			lastWasCR		= false;
boolean 		firstRead		= false;
boolean			EOFSeen			= false;
/** 
Creates a new PageReader wrapping the specified reader
*/
public PageReader(Reader r)
	{
	Argcheck.notnull(r, "specified reader was null");
	this.reader = r;
	}

/**
Creates a reader with the specified non-null encoding.
*/
public PageReader(File file, String encoding)  throws IOException
	{
	Argcheck.notnull(file, "specified file was null");
	Argcheck.notnull(encoding, "specified encoding was null");	
	reader = new InputStreamReader(
				new FileInputStream(file), encoding); 
	}

/**
Creates a reader using the UTF-8 encoding.
*/
public PageReader(File file)  throws IOException
	{
	reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
	//FileReader(file, "UTF-8"); //no need to buffer, we have our own
	}

public void close() throws IOException
	{
	reader.close();
	}

private final boolean shouldReadFromPreviousBuffer()
	{
	return prevbuf != null && prevbuf_pos < prevbuf.length;
	}
	
public int read() throws IOException
	{
	if (shouldReadFromPreviousBuffer()) {
		char c = prevbuf[prevbuf_pos++];
		if (dbg) System.out.println(">>>>>>>> DEBUG: read() from PREVBUF, c=" + StringUtil.viewableAscii(c));
		adjustReadLineNum(c);
		return c;		
		}

	//count also marks the max readable position in the buffer (anything
	//after that is junk from a previous read).
	
	if (pos >= count) 
		{
		//this is a performance hack
		if (EOFSeen)
			return -1;
		
		//for unread, suppose we have only a few chars in the 
		//newbuf...the file may be 1 char more than the buf size
		//so we have to use a prevbuf as well.
		
		//prevbuf = buf;  //mybad: prevbuf, buf refer to the SAME buffer now. 
		if (firstRead) {
			if (prevbuf == null) {
				prevbuf = new char[BUF_SIZE];
				}
			System.arraycopy(buf, 0, prevbuf, 0, buf.length);
			}
			
		//now refill the buffer
		count = reader.read(buf); //count could be 0 or -1 here
		firstRead = true;	//latch
		
		if (count == 0)  {
			return 0; 
			}
		else if (count == -1) {
			EOFSeen = true;
			return -1;
			}
			
		pos = 0;   //however, count could be -1 which is why there is a
				   //pos >= count (not pos == count) test above for the
				   //next read()
		}

	//count has to be at least 1 here	
	char c = buf[pos++];	
	
	if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c));
	adjustReadLineNum(c);

	return c;
	}

public int read(char[] buf, int start, int len) throws IOException
	{
	throw new IOException("not implemented, use the read() method instead");
	}

/**
Unreads the current character (which could be EOF) so that the next read will 
return the current character (or EOF) again.
*/
public void unread() throws IOException
	{
	char c = 0;
	
	if (pos == 0) 
		{
		if (prevbuf != null) 
			{
			if (prevbuf_pos == 0) {
				throw new IOException("Unread limit reached. Cannot unread more characters");
				}
			else{
				c = prevbuf[--prevbuf_pos];
				if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from PREVBUF, c=" + StringUtil.viewableAscii(c));
				}
			}
		else { //prevbuf == null
			throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far");
			}
		}
	else{ //pos != 0
		c = buf[--pos];
		if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c));
		}
	
	adjustUnreadLineNum(c);
	}

/**
Unreads the specified number of characters
*/
public void unread(int count)  throws IOException
	{
	for (int n = 0; n < count; n++) {
		unread();
		}
	}

void adjustReadLineNum(char c)
	{
	// we can read: \r, \r\n , \n all of which increase line count by exactly 1
	switch (c) 
		{
		case '\n': 
			if (! lastWasCR) {
				line++;
				lastcol=col;
				col=1;
				}
			else {
				lastWasCR = false;
				}
			break;
		
		case '\r': 
			line++;
			lastcol=col;
			col=1;
			lastWasCR = true;
			break;
		
		case '\t':
			col = col + 4;
			break;
		
		default:
			col++;
		}
	}


void adjustUnreadLineNum(char c)
	{
	// we can unread: \r, \r\n , \n all of which reduce line count by exactly 1
	switch (c) {
		case '\n': 
			pushBackNL = true;
			line--;
			col=lastcol;
			break;
		case '\r': 
			if (! pushBackNL) { 
				line--;
				col=lastcol;
				}
			else{
				pushBackNL = false;
				}
			break;
		case '\t':
			col = col - 4;
			break;
		default:
			col--;
		}
	}

public int peek() throws IOException
	{
	// in the worst case, peek() is called when we are the end of the buffer	
	// which means we have to refill the buffer. That means unread() will
	// not work since we have lost the original buffer contents. however,
	// the chances of this worst case are 1 / buffer_size which is very low
	// to handle the worst case, we simply double the buffer as to not loose
	// the previous data.
	if (pos == count) 
		{
		char[] newbuf = new char[buf.length * 2];
		int newcount = reader.read(newbuf, count, buf.length);
		if (newcount == -1)   
			return -1;
			
		System.arraycopy(buf, 0, newbuf, 0, buf.length);
		buf = newbuf;
		count = count + newcount;
		}
	
	return buf[pos];
	}


/**
Skips all whitespace characters such that the next {@link read} will
return the <b>next</b> non-whitespace character (or EOF if there are no
more characters).
*/
public void skipWhitespace() throws IOException
	{
	int c = -1;
	while (true)
		{
		c = read();
		
		if (c == -1) {
			break;
			}
		
		if (! Character.isWhitespace(c)) {
			unread();
			break;
			}
		}
	}
	

/**
Tries to read/consumes the specified char and returns true
if successful. If the specified char is not found, does not
consume anything and returns false.
*/
public boolean match(int target) throws IOException
	{
	int c = read();
	
	if (c == target)
		return true;
	else
		unread();
	
	return false;
	}

/**
Tries to read/consumes the specified non-null string and returns true
if successful. If the specified string is not found, does not
consume anything and returns false.
*/
public boolean match(String target) throws IOException
	{
	if (target == null)
		throw new IllegalArgumentException("Specified target string was null");
	
	int c = -1;
	for (int i = 0; i < target.length(); i++)
		{
		c = read();
		
		if ( c == -1 || c != target.charAt(i)) {
			unread(i+1);
			return false;
			}
		}
	
	return true;
	}

public boolean matchIgnoreCase(String target) throws IOException
	{
	if (target == null)
		throw new IllegalArgumentException("Specified target string was null");
	
	int c = -1;
	for (int i = 0; i < target.length(); i++)
		{
		c = read();
		
		if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) {
			unread(i+1);
			return false;
			}

		}
	
	return true;
	}

public boolean markSupported()
	{
	return false;
	}

public 	int getLine() { 
	return line; 
	}
	
public 	int getCol() { 
	return col; 
	}

char[] getBuf() { return buf; }
char[] getPrevBuf() { return prevbuf; }
int getPos() { return pos; }
int getCount() { return count; } 
int getPrevPos() { return prevbuf_pos; }

//other utility methods

public static void main (String args[]) throws IOException
	{
	//CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */
	StringReader sr = null;
	PageReader lex = null;
	int c = -1;
	
	System.out.println("Reading an empty string.....");	
	sr = new StringReader("");	
	lex = new PageReader(sr);
	while ( (c = lex.read()) != -1) {
		testprint(lex, c);
		}
	
	System.out.println("----------------- TEST 2 --------------");
	sr = new StringReader("abc");		
	lex = new PageReader(sr);
	while ( (c = lex.read()) != -1) {
		testprint(lex, c);
		//System.out.print(c + " ");
		}

	System.out.println("----------------- TEST 3 --------------");
	sr = new StringReader("abcde");		
	lex = new PageReader(sr);
	try	{
		c = lex.read();
		testprint(lex, c);
		lex.unread();
		testprint(lex, -10);
		lex.unread();
		testprint(lex, -10);
		c = lex.read();
		testprint(lex, c);
		}
	catch (Exception e) {
		e.printStackTrace();
		}

	System.out.println("----------------- TEST 4 --------------");
	sr = new StringReader("abcd\ne");		
	lex = new PageReader(sr);
	try	{
		c = lex.read();
		testprint(lex, c);
		lex.unread();
		testprint(lex, -10);

		for (int i = 0; i < 5; i++) {
			c = lex.read();
			testprint(lex, c);
			}

		for (int i = 0; i < 5; i++) {
			lex.unread();
			testprint(lex, -10);
			}
		
		for (int i = 0; i < 5; i++) {
			c = lex.read();
			testprint(lex, c);
			}
		
		c = lex.read();
		testprint(lex, c);
		}
	catch (Exception e) {
		e.printStackTrace();
		}

	System.out.println("----------------- TEST 5 --------------");
	sr = new StringReader("abcd\r\ne");		
	lex = new PageReader(sr);
	try	{
		c = lex.read();
		testprint(lex, c, lex.peek());
		lex.unread();
		testprint(lex, -10, lex.peek());

		for (int i = 0; i < 5; i++) {
			c = lex.read();
			testprint(lex, c, lex.peek());
			}

		for (int i = 0; i < 5; i++) {
			lex.unread();
			testprint(lex, -10, lex.peek());
			}
		
		for (int i = 0; i < 5; i++) {
			c = lex.read();
			testprint(lex, c, lex.peek());
			}
		
		c = lex.read();
		testprint(lex, c, lex.peek());
		}
	catch (Exception e) {
		e.printStackTrace();
		}

	}

private static void testprint(PageReader lex, int c, int peek)
	{
	if (c == -1) {
		System.out.println("====> recieved EOF (-1) from read().......");
		}

	System.out.format(
		"prevbuf=%s, prevpos=%d\nbuf=%s, pos=%d, count=%d\nline=%d, col=%d, char=[%s]",
		StringUtil.arrayToString(lex.getPrevBuf()), lex.getPrevPos(),
 		StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getCount(),
		lex.getLine(), lex.getCol(), 
		(c == -10) ? "N/A" : StringUtil.viewableAscii((char)c));	

	if (peek != -2)  
		System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek));

	System.out.print("\n\n");
	}

private static void testprint(PageReader lex, int c)
	{
	testprint(lex, c, -2);
	}

}
