

#include "Scanner.h"
#include <memory.h>
#include <string.h>

char* coco_string_create(const char* value) {
	char* data;
	if (value) {
		int length = strlen(value);
		data = new char[length+1];
		strcpy(data, value);
	} else {
		data = new char[1];
		data[0] = 0;
	}
	return data;
}

char* coco_string_create(const char *value , int startIndex, int length) {
	char* data;
	if (value) {
		data = new char[length+1];
		strncpy(data, &(value[startIndex]), length);
		data[length] = 0;
	} else {
		data = new char[1];
		data[0] = 0;
	}
	return data;
}

void coco_string_delete(char* &data) {
	if (data)
		delete data;
	data = NULL;
}

void coco_string_set(char* &data, const char* value) {
	char* newData;
	if (value) {
		int length = strlen(value);
		newData = new char[length+1];
		strcpy(newData, value);
	} else {
		newData = new char[1];
		newData[0] = 0;
	}
	char* oldData = data;
	delete oldData;
	data = newData;
}

int coco_string_length(char* data) {
	if (data)
		return strlen(data);
	else
		return 0;
}

bool coco_string_endswith(char* data, char *value) {
	int dataLen  = strlen(data);
	int valueLen = strlen(value);
	if (valueLen <= dataLen) {
		return !strcmp(data+dataLen-valueLen, value);
	} else
		return false;
}

int coco_string_indexof(char* data, char value) {
	char* chr = strchr(data, value);

	if (chr)
		return (chr-data);
	else
		return -1;
}

int coco_string_lastindexof(char* data, char value) {
	char* chr = strrchr(data, value);

	if (chr)
		return (chr-data);
	else
		return -1;
}

char* coco_string_append(const char* data1, const char* data2) {
	char* data;

	if (!data1 && !data2)
		return NULL;

	int data1Len = 0;
	if (data1)
		data1Len = strlen(data1);
	int data2Len = 0;
	if (data2)
		data2Len = strlen(data2);

	data = new char[data1Len+data2Len+1];

	if (data1)
		strcpy(data, data1);
	if (data2)
		strcpy(data+data1Len, data2);

	data[data1Len+data2Len] = 0;

	return data;
}

void coco_string_merge(char* &data, char* value) {
	if (!value)
		return;

	char* newData;

	int dataLen = 0;
	if (data)
		dataLen = strlen(data);
	int valueLen = 0;
	if (value)
		valueLen = strlen(value);

	newData = new char[dataLen+valueLen+1];

	if (data)
		strcpy(newData, data);
	if (value)
		strcpy(newData+dataLen, value);

	newData[dataLen+valueLen] = 0;

	if (data)
		delete data;

	data = newData;
}

void coco_string_merge(char* &data, char value) {
	char* newData;

	int dataLen = 0;
	if (data)
		dataLen = strlen(data);

	newData = new char[dataLen+2];

	if (data)
		strcpy(newData, data);

	newData[ dataLen ] = value;
	newData[dataLen+1] = 0;

	data = newData;
}


char* coco_string_substring(char* data, int startIndex, int length) {
	char* newData;
	if (data) {
		newData = new char[length+1];
		strncpy(newData, &(data[startIndex]), length);
		newData[length] = 0;
	} else {
		newData = new char[1];
		newData[0] = 0;
	}
	return newData;
}

char* coco_string_toupper(char* data) {
	if (!data)
		return NULL;

	char* newData;

	int dataLen = 0;
	if (data)
		dataLen = strlen(data);

	newData = new char[dataLen+1];

	for (int i=0; i<=dataLen; i++) {
		if (('a' <= data[i]) && (data[i] <= 'z'))
			newData[i] = data[i] + ('A'-'a');
		else
			newData[i] = data[i];
	}
	newData[dataLen] = '\0';

	return newData;
}

char* coco_string_tolower(char* data) {
	if (!data)
		return NULL;

	char* newData;

	int dataLen = 0;
	if (data)
		dataLen = strlen(data);

	newData = new char[dataLen+1];

	for (int i=0; i<=dataLen; i++) {
		if (('A' <= data[i]) && (data[i] <= 'Z'))
			newData[i] = data[i] - ('A'-'a');
		else
			newData[i] = data[i];
	}
	newData[dataLen] = '\0';

	return newData;
}

char coco_string_charat(char* data, int subscript) {
	if (!data)
		return 0;

	int dataLen = 0;
	if (data)
		dataLen = strlen(data);

	if( subscript < 0 || subscript >= dataLen ) {
		return 0;
	}

	return data[subscript];
}

bool coco_string_equal(char* data1, char* data2) {
	return strcmp( data1, data2 ) == 0; 
}

int coco_string_compareto(char* data1, char* data2) {
	return strcmp(data1, data2);
}



BitArray::BitArray(int length, bool defaultValue)
{
	Count = length;
	Data = new unsigned char[ (length+7)>>3 ];
	if (defaultValue)
		memset(Data, 0xFF, (length+7)>>3);
	else
		memset(Data, 0x00, (length+7)>>3);
}

BitArray::BitArray(const BitArray &copy) {
	Count  = copy.Count;
	Data = new unsigned char[ (copy.Count+7)>>3 ];
	memcpy(Data, copy.Data, (copy.Count+7)>>3);
}

BitArray::~BitArray()
{
	delete [] Data;
}

int BitArray::getCount() {
	return Count;
}

bool BitArray::Get(int index)
{
	return (Data[(index>>3)] & (1 << (index&7))) != 0;
}

bool BitArray::operator[]( int index ) const
{
	return (Data[(index>>3)] & (1 << (index&7))) != 0;
}

void BitArray::Set(int index, bool value)
{
	if (value){
		Data[(index>>3)] |= (1 << (index&7));
	} else {
		unsigned char mask = 0xFF;
		mask ^= (1 << (index&7));
		Data[(index>>3)] &= mask;
	}
}

void BitArray::SetAll(bool value)
{
	if (value)
		memset(Data, 0xFF, (Count+7)>>3);
	else
		memset(Data, 0x00, (Count+7)>>3);
}


void BitArray::Not()
{
	for (int i=0; i<(Count+7)>>3; i++) {
		Data[i] ^= 0xFF;
	}
}

void BitArray::And(BitArray value)
{
	for (int i=0; (i<(Count+7)>>3) && (i<(value.Count+7)>>3); i++) {
		Data[i] = (Data[i] & value.Data[i]);
	}
}

void BitArray::Or(BitArray value)
{
	for (int i=0; (i<(Count+7)>>3) && (i<(value.Count+7)>>3); i++) {
		Data[i] = (Data[i] | value.Data[i]);
	}
}

void BitArray::Xor(BitArray value)
{
	for (int i=0; (i<(Count+7)>>3) && (i<(value.Count+7)>>3); i++) {
		Data[i] = (Data[i] ^ value.Data[i]);
	}
}

BitArray &BitArray::Clone()
{
	BitArray *newBitArray = new BitArray(Count);
	newBitArray->Count = Count;
	memcpy(newBitArray->Data, Data, (Count+7)>>3);
	return *newBitArray;
}

bool BitArray::Equal( const BitArray &right )
{
	if (Count != right.Count) {
		return false;
	} else {
		for(int i=0; i<Count; i++)
			if ((Data[(i>>3)] & (1<<(i&7))) != (right.Data[(i>>3)] & (1<<(i&7))))
				return false;
	}
	return true;
}

const BitArray &BitArray::operator=( const BitArray &right )
{
	if ( &right != this ) {         // avoid self assignment
		delete [] Data;              // prevents memory leak
		Count  = right.Count;
		Data = new unsigned char[ (Count+7)>>3 ];
		memcpy(Data, right.Data, (Count+7)>>3);
	}
	return *this;   // enables cascaded assignments
}

Token::Token() {
	kind = 0;
	pos  = 0;
	col  = 0;
	line = 0;
	val  = NULL;
	next = NULL;
}

Token::~Token() {
	coco_string_delete(val);
}


Buffer::Buffer(FILE* s, bool isUserStream) :EoF(256) {
	stream = s; this->isUserStream = isUserStream;
	fseek(s, 0, SEEK_END);
	bufLen = ftell(s);
	fileLen = bufLen;
	fseek(s, 0, SEEK_SET);
	bufLen = MAX_BUFFER_LENGTH;
	buf = new char[bufLen];
	bufStart = INT_MAX; // nothing in the buffer so far
	setPos(0);          // setup  buffer to position 0 (start)
	if (bufLen == fileLen) Close();
}

Buffer::~Buffer() {
	Close(); 
}

void Buffer::Close() {
	if (!isUserStream && stream != NULL) {
		fclose(stream);
		stream = NULL;
	}
}

int Buffer::Read() {
	if (pos < bufLen) {
		return buf[pos++];
	} else if (getPos() < fileLen) {
		setPos(getPos()); // shift buffer start to Pos
		return buf[pos++];
	} else {
		return EoF;
	}
}

int Buffer::Peek() {
	if (pos < bufLen) {
		return buf[pos];
	} else if (getPos() < fileLen) {
		setPos(getPos()); // shift buffer start to pos
		return buf[pos];
	} else {
		return EoF;
	}
}

char* Buffer::GetString(int beg, int end) {
	int len = end - beg;
	char *buf = new char[len];
	int oldPos = getPos();
	setPos(beg);
	for (int i = 0; i < len; ++i) buf[i] = (char) Read();
	setPos(oldPos);
	return buf;
}

int Buffer::getPos() {
	return pos + bufStart;
}

void Buffer::setPos(int value) {
	if (value < 0) value = 0;
	else if (value > fileLen) value = fileLen;
	if (value >= bufStart && value < bufStart + bufLen) { // already in buffer
		pos = value - bufStart;
	} else if (stream != NULL) { // must be swapped in
		fseek(stream, value, SEEK_SET);
		bufLen = fread(buf, sizeof(char), MAX_BUFFER_LENGTH, stream);
		bufStart = value; pos = 0;
	} else {
		pos = fileLen - bufStart; // make Pos return fileLen
	}
}


Scanner::Scanner(char* fileName) {
	FILE* stream;
	if ((stream = fopen(fileName, "rb")) == NULL) {
		printf("--- Cannot open file %s\n", fileName);
		exit(1);
	}
	buffer = new Buffer(stream, false);
	Init();
}
	
Scanner::Scanner(FILE* s) {
	buffer = new Buffer(s, true);
	Init();
}

Scanner::~Scanner() {
	void* cur = firstHeap;

	while(cur != NULL) {
		cur = *((void**) ((int) cur + HEAP_BLOCK_SIZE));
		free(firstHeap);
		firstHeap = cur;
	}
	delete ignore;
	delete buffer;
}

void Scanner::Init() {
	EOL    = '\n';
	eofSym = 0;
	charSetSize = 256;
	maxT = 45;
	noSym = 45;
	short lstart[] = {
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0, 31,  5,  0,  0, 34, 35,  2,  9, 17, 33, 28, 16, 40, 23, 38,
	 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 41, 15, 37, 42, 44, 39,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 18,  0, 19,  0,  0,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 10, 43, 22,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  -1};
	this->start = new short[257];
	memcpy(this->start, lstart, 257*sizeof(short));

	tvalLength = 128;
	tval = new char[tvalLength]; // text of current token

	// HEAP_BLOCK_SIZE byte heap + pointer to next heap block
	heap = malloc(HEAP_BLOCK_SIZE + sizeof(void*));
	firstHeap = heap;
	heapEnd = (void*) (((int) heap) + HEAP_BLOCK_SIZE);
	*(int*) heapEnd = NULL;
	heapTop = heap;
	if (sizeof(Token) > HEAP_BLOCK_SIZE) {
		printf("--- Too small HEAP_BLOCK_SIZE\n");
		exit(1);
	}

	pos = -1; line = 1; lineStart = 0;
	oldEols = 0;
	NextCh();
	ignore = new BitArray(charSetSize+1);
	ignore->Set(' ', true);  // blanks are always white space
	ignore->Set( 9, true); ignore->Set( 10, true); ignore->Set( 13, true); 
	pt = tokens = CreateToken(); // first token is a dummy
}

void Scanner::NextCh() {
	if (oldEols > 0) { ch = EOL; oldEols--; } 
	else {
		ch = buffer->Read(); pos++; //(char)
		// replace isolated '\r' by '\n' in order to make
		// eol handling uniform across Windows, Unix and Mac
		if (ch == '\r' && buffer->Peek() != '\n') ch = EOL;
		if (ch == EOL) { line++; lineStart = pos + 1; }
	}

}

void Scanner::AddCh() {
	if (tlen >= tvalLength) {
		tvalLength *= 2;
		char *newBuf = new char[tvalLength];
		memcpy(newBuf, tval, tlen*sizeof(char));
		delete tval;
		tval = newBuf;
	}
	tval[tlen++] = ch;
	NextCh();
}


bool Scanner::Comment0() {
	int level = 1, line0 = line, lineStart0 = lineStart;
	NextCh();
	if (ch == '/') {
		NextCh();
		for(;;) {
			if (ch == 13) {
				NextCh();
				if (ch == 10) {
					level--;
					if (level == 0) { oldEols = line - line0; NextCh(); return true; }
					NextCh();
				}
			} else if (ch == buffer->EoF) return false;
			else NextCh();
		}
	} else {
		if (ch==EOL) {line--; lineStart = lineStart0;}
		pos = pos - 2; buffer->setPos(pos+1); NextCh();
	}
	return false;
}

bool Scanner::Comment1() {
	int level = 1, line0 = line, lineStart0 = lineStart;
	NextCh();
	if (ch == '*') {
		NextCh();
		for(;;) {
			if (ch == '*') {
				NextCh();
				if (ch == '/') {
					level--;
					if (level == 0) { oldEols = line - line0; NextCh(); return true; }
					NextCh();
				}
			} else if (ch == '/') {
				NextCh();
				if (ch == '*') {
					level++; NextCh();
				}
			} else if (ch == buffer->EoF) return false;
			else NextCh();
		}
	} else {
		if (ch==EOL) {line--; lineStart = lineStart0;}
		pos = pos - 2; buffer->setPos(pos+1); NextCh();
	}
	return false;
}


void Scanner::CheckLiteral() {
	if (coco_string_equal(t->val, "over")) t->kind = 10;
	else if (coco_string_equal(t->val, "int")) t->kind = 11;
	else if (coco_string_equal(t->val, "double")) t->kind = 12;
	else if (coco_string_equal(t->val, "char")) t->kind = 13;
	else if (coco_string_equal(t->val, "port")) t->kind = 14;
	else if (coco_string_equal(t->val, "DefVar")) t->kind = 15;
	else if (coco_string_equal(t->val, "length")) t->kind = 42;
	else if (coco_string_equal(t->val, "default")) t->kind = 43;

}

void Scanner::CreateHeapBlock() {
	void* newHeap;
	void* cur = firstHeap;

	while(((int) tokens < (int) cur) || ((int) tokens > ((int) cur + HEAP_BLOCK_SIZE))) {
		cur = *((void**) ((int) cur + HEAP_BLOCK_SIZE));
		free(firstHeap);
		firstHeap = cur;
	}

	// HEAP_BLOCK_SIZE byte heap + pointer to next heap block
	newHeap = malloc(HEAP_BLOCK_SIZE + sizeof(void*));
	*(int*) heapEnd = (int) newHeap;
	heapEnd = (void*) (((int) newHeap) + HEAP_BLOCK_SIZE);
	*(int*) heapEnd = NULL;
	heap = newHeap;
	heapTop = heap;
}

Token* Scanner::CreateToken() {
	Token *t;
	if (((int) sizeof(Token) + (int) heapTop) >= (int) heapEnd) {
		CreateHeapBlock();
	}
	t = (Token*) heapTop;
	heapTop = (void*) ((int) heapTop + sizeof(Token));
	t->val = NULL;
	t->next = NULL;
	return t;
}

void Scanner::AppendVal(Token *t) {
	if ((tlen + 1 + (int) heapTop) >= (int) heapEnd) {
		if (tlen + 1 > HEAP_BLOCK_SIZE) {
			printf("--- Too long token value\n");
			exit(1);
		}
		CreateHeapBlock();
	}
	t->val = (char*) heapTop;
	heapTop = (void*) ((int) heapTop + tlen + 1);
	strncpy(t->val, tval, tlen);
	t->val[tlen] = 0;
}

Token* Scanner::NextToken() {
	while ((*ignore)[ch]) NextCh();
	if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken();
	t = CreateToken();
	t->pos = pos; t->col = pos - lineStart + 1; t->line = line; 
	int state = start[ch];
	tlen = 0; AddCh();

	switch (state) {
		case -1: { t->kind = eofSym; break; } // NextCh already done
		case 0: { t->kind = noSym; break; }   // NextCh already done
		case 1:
			case_1:
			if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {AddCh(); goto case_1;}
			else {t->kind = 1; t->val = coco_string_create(tval, 0, tlen); CheckLiteral(); return t;}
		case 2:
			case_2:
			if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {AddCh(); goto case_3;}
			else {t->kind = noSym; break;}
		case 3:
			case_3:
			if (ch == 39) {AddCh(); goto case_4;}
			else {t->kind = noSym; break;}
		case 4:
			case_4:
			{t->kind = 3; break;}
		case 5:
			case_5:
			if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {AddCh(); goto case_5;}
			else if (ch == '"') {AddCh(); goto case_6;}
			else {t->kind = noSym; break;}
		case 6:
			case_6:
			{t->kind = 4; break;}
		case 7:
			case_7:
			if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_8;}
			else {t->kind = noSym; break;}
		case 8:
			case_8:
			if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_8;}
			else {t->kind = 5; break;}
		case 9:
			case_9:
			{t->kind = 6; break;}
		case 10:
			case_10:
			{t->kind = 7; break;}
		case 11:
			case_11:
			if (!(ch == '>') && ch != buffer->EoF) {AddCh(); goto case_11;}
			else if (ch == '>') {AddCh(); goto case_12;}
			else {t->kind = noSym; break;}
		case 12:
			case_12:
			{t->kind = 8; break;}
		case 13:
			case_13:
			if ((ch <= '"' || ch >= '$' && ch <= '.' || ch >= '0')) {AddCh(); goto case_13;}
			else {t->kind = 9; break;}
		case 14:
			case_14:
			if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_14;}
			else if (ch == '.') {AddCh(); goto case_7;}
			else {t->kind = 2; break;}
		case 15:
			case_15:
			{t->kind = 16; break;}
		case 16:
			case_16:
			{t->kind = 17; break;}
		case 17:
			case_17:
			{t->kind = 18; break;}
		case 18:
			case_18:
			{t->kind = 19; break;}
		case 19:
			case_19:
			{t->kind = 20; break;}
		case 20:
			case_20:
			{t->kind = 21; break;}
		case 21:
			case_21:
			{t->kind = 22; break;}
		case 22:
			case_22:
			{t->kind = 23; break;}
		case 23:
			case_23:
			if (ch == '.') {AddCh(); goto case_24;}
			else {t->kind = noSym; break;}
		case 24:
			case_24:
			{t->kind = 24; break;}
		case 25:
			case_25:
			{t->kind = 25; break;}
		case 26:
			case_26:
			{t->kind = 26; break;}
		case 27:
			case_27:
			{t->kind = 28; break;}
		case 28:
			case_28:
			{t->kind = 30; break;}
		case 29:
			case_29:
			{t->kind = 34; break;}
		case 30:
			case_30:
			{t->kind = 36; break;}
		case 31:
			case_31:
			if (ch == '=') {AddCh(); goto case_32;}
			else {t->kind = noSym; break;}
		case 32:
			case_32:
			{t->kind = 37; break;}
		case 33:
			case_33:
			{t->kind = 38; break;}
		case 34:
			case_34:
			{t->kind = 40; break;}
		case 35:
			case_35:
			if (ch == '&') {AddCh(); goto case_36;}
			else {t->kind = noSym; break;}
		case 36:
			case_36:
			{t->kind = 41; break;}
		case 37:
			case_37:
			if ((ch <= '<' || ch >= '?')) {AddCh(); goto case_11;}
			else if (ch == '>') {AddCh(); goto case_12;}
			else if (ch == '=') {AddCh(); goto case_45;}
			else {t->kind = 32; break;}
		case 38:
			case_38:
			if (ch == '#') {AddCh(); goto case_13;}
			else {t->kind = 39; break;}
		case 39:
			case_39:
			if (ch == '=') {AddCh(); goto case_20;}
			else {t->kind = 29; break;}
		case 40:
			case_40:
			if (ch == '>') {AddCh(); goto case_21;}
			else {t->kind = 31; break;}
		case 41:
			case_41:
			if (ch == ':') {AddCh(); goto case_25;}
			else if (ch == '=') {AddCh(); goto case_26;}
			else {t->kind = noSym; break;}
		case 42:
			case_42:
			if (ch == '=') {AddCh(); goto case_30;}
			else {t->kind = 27; break;}
		case 43:
			case_43:
			if (ch == '|') {AddCh(); goto case_27;}
			else {t->kind = 44; break;}
		case 44:
			case_44:
			if (ch == '=') {AddCh(); goto case_29;}
			else {t->kind = 33; break;}
		case 45:
			case_45:
			if (!(ch == '>') && ch != buffer->EoF) {AddCh(); goto case_11;}
			else if (ch == '>') {AddCh(); goto case_12;}
			else {t->kind = 35; break;}

	}
	AppendVal(t);
	return t;
}

// get the next token (possibly a token already seen during peeking)
Token* Scanner::Scan() {
	if (tokens->next == NULL) {
		return tokens = NextToken();
	} else {
		pt = tokens = tokens->next;
		return tokens;
	}
}

// peek for the next token, ignore pragmas
Token* Scanner::Peek() {
	if (pt->next == NULL) {
		do {
			pt = pt->next = NextToken();
		} while (pt->kind > maxT); // skip pragmas
	} else {
		do {
			pt = pt->next; 
		} while (pt->kind > maxT);
	}
	return pt;
}

// make sure that peeking starts at the current scan position
void Scanner::ResetPeek() {
	pt = tokens;
}



