#include <stdlib.h>
#include <stdio.h>
#include <unistd.h> // options
#include <dirent.h> // directory
#include <dbi/dbi.h> // database
#include <string.h>
#include <algorithm>
#include <iostream>
#include <string>
#include <set>
#include <map>
#include <fstream>
#include <ios>
using namespace std;

// g++ orthy.c -ldbi -O3 -o corthy -Wall
// ./corthy -d -n -p test -g . -b -c all ensGene knownGene refGene vegaGene
// ./corthy -p test -f -c all -m maf_dir -s hg19 taeGut1 ornAna1 allMis0 chrPic0 pytMol0 anoCar2 galGal3
// ./corthy -p test -r export -y 10 -c all -m maf_dir -s hg19 taeGut1 ornAna1 allMis0 chrPic0 pytMol0 anoCar2 galGal3
// -w store also good gene structures

// global variables
struct dirent *dp;
DIR *dirp;
dbi_conn conn, conn2;

int first = 0;
int second = 1;
int third = 2;

char tempchar[100];

// option variables
char *fasta_dir = (char*)"corthy_res";
char *genes_dir = (char*)".";
char *mafs_dir = (char*)".";
char *ref_genome = (char*)"hg";
char* cluster_group = (char*)"all";
char* config_file = (char*)"config.txt";
char* project = (char*)"temp";

// filtering variables
char* actual_spec;
char* query_list;
int count_queries = 0;

// config variables
char* DB_HOST = (char*)"localhost";
char* DB_USER = (char*)"root";
char* DB_PASSW = (char*)"passw";
char* DB_NAME = (char*)"turtle";
int FILT_ALIGNMENT = 18;
int FILT_SYNTENY = 12;
int FILT_FRAME = 16;

char *query, *subquery;
int geneset, clusterset;

// offset variables
int MAX_SETS = 4096; // appropriate value
int sets_count = 0;
int* sets_array = new int[MAX_SETS];
char* sets_list = new char[5*MAX_SETS];
int *offset = new int[MAX_SETS];
int *resultset = new int[MAX_SETS];

// query variables
dbi_result result;
dbi_result result2;
int qresult;
int qbegin, qend;
long long qcount; 

// species variable
int spec_count;
char** spec_list;

// result variables
/*int* results_array = new int[MAX_SETS];
int results_count;*/
char** results_list; // = new char[5*MAX_SETS];

// genes and exons variables
int* gene_begin;
int* gene_end;
int* gene_xid;
int* gene_set;
int* gene_sid;
char** gene_name;
char* gene_strand;

int* gene_in_set;

int* exon_begin;
int* exon_end;
int* exon_set;
int* exon_sid;

int* exon_gene_begin;
int* exon_gene_end;
int* exon_gene_id;
char** exon_gene_name;

int* ref_frame;
int* spec_frame;
int* frame_begin;
int* frame_end;

// nonsense
int* frame_fix;
int* frame_var;
int ref_frame_actual;

int addr_exon(int target) {
	return offset[exon_set[target]] + exon_sid[target];
}

int addr_gene(int target) {
	return offset[gene_set[target]] + gene_sid[target];
}

// execute all queries in list
void flush_errors() {
	if (count_queries > 0) {
//		cout << query_list << "\n";
		dbi_conn_query(conn, query_list);
		delete[] query_list;
		count_queries = 0;
	}
}

// add query into list
void add_error(int addr, char* test, int err_begin, int err_end, char* desc = (char*)"", char* note = (char*)"") {
	if (count_queries == 0) {
		query_list = new char[2000*200];
		sprintf(query_list, "INSERT INTO `%s_errors` "
			"(`xresult`, `xgene`, `test`, `desc`, `note`, `begin`, `end`) "
			"VALUES ",
			project);
	}
	count_queries++;

	if (count_queries > 1) {
		strcat(query_list, ", ");
	}

	if (addr < 0) {
		subquery = new char[1024];
		sprintf(subquery, "(%i, %i, '%s', '%s', '%s', %i, %i) ",
			0,
			0,
			test, 
			desc,
			note,
			err_begin, 
			err_end);
		strcat(query_list, subquery);
		delete[] subquery;

	} else {
		subquery = new char[1024];
		sprintf(subquery, "(%i, %i, '%s', '%s', '%s', %i, %i) ",
			resultset[gene_in_set[addr]],
			gene_xid[addr],
			test, 
			desc,
			note,
			err_begin, 
			err_end);
		strcat(query_list, subquery);
		delete[] subquery;
	}

	if (count_queries >= 100) {
		flush_errors();
	}
}

// close actual frameshift
void finalize_frame(int addr, int border) {
	if (frame_begin[addr] >= 0) {
		if (frame_end[addr] < 0) {
			frame_end[addr] = min(border, gene_end[addr]+1);
		}

		int soft = 0;
		if (frame_end[addr] < gene_end[addr] -1) {
			soft = 1;
		}

		if (soft > 0) {
			if (frame_end[addr] - frame_begin[addr] >= FILT_FRAME) {
				add_error(addr, (char*)"frame", frame_begin[addr], frame_end[addr]);
			}
		} else {
			add_error(addr, (char*)"frame", frame_begin[addr], frame_end[addr]);
		}

		frame_begin[addr] = -1;
		frame_end[addr] = -1;
	}
}

// delete specified table
void drop_table(char* table) {
	query = new char[1000];
	sprintf(query, 
		"DROP TABLE IF EXISTS %s_%s",
		project,
		table);
	dbi_conn_query(conn, query);
	delete[] query;
}

// same as fgets() but removes newline
char* fgetstr(char *string, int n, FILE *stream) {
	char* result;
	result = fgets(string, n, stream);

	if (!result) {
		return result;
	}

	if (string[strlen(string) - 1] == '\n') {
		string[strlen(string) - 1] = 0;
	}

	return string;
}

// convert reverse strand
void reverse(char s[])
{
	int length = strlen(s) ;
	int c, i, j;

	for (i = 0, j = length - 1; i < j; i++, j--)
	{
		c = s[i];
		s[i] = s[j];
		s[j] = c;
	}
	for (i = 0; i < length; i++)
	{
		if (toupper(s[i]) == 'A') { s[i] = 'T'; }
		else if (toupper(s[i]) == 'T') { s[i] = 'A'; }
		else if (toupper(s[i]) == 'C') { s[i] = 'G'; }
		else if (toupper(s[i]) == 'G') { s[i] = 'C'; }
	}
}

void calculate_offsets(char* chrom) {
	int counter = 0;
	for (int index = 1; index <= sets_count; index++) {
		query = new char[1000];
		sprintf(query, 
			"SELECT max(`sid`) as `res` FROM `%s_genes` "
			"WHERE `xset` = %i AND `chrom` = '%s';",
			project,
			sets_array[index],
			chrom);
		result = dbi_conn_query(conn, query);
		delete[] query;

		if (result) {
			qresult = 0;
			while (dbi_result_next_row(result)) {
				qresult = dbi_result_get_int(result, "res");
			}	

			offset[sets_array[index]] = counter;
			counter += qresult;
		}
		dbi_result_free(result);
	}
}

int main(int argc, char *argv[]) {

	// constants
	int MAF_FILE_COLS = 7;
	int GP_FILE_COLS = 10;
	int MAF_FILE_LINE = 65536;
	int GP_FILE_LINE = 4096*32; // for genes with many exons
	int GP_FILE_WORD = 64;
	int GP_FILE_EXONS = 4096; // some genes could have thousands

	// switch variables
	bool do_gp = false, do_cluster = false, do_create = false, do_test = false, do_list = false;
	bool do_list_sets = false, do_list_clusters = false, do_delete = false, do_filter = false;
	bool do_result = false, do_extended = false, do_borders = true, do_export = false, do_allexport = false;
	bool do_delimit = false;
	int ommit_range = 0;

	char line[MAF_FILE_LINE];
	char *word = (char*)"";
	char sword[1024] = "";
	char *splits[GP_FILE_COLS*2];
	char *bsplits[GP_FILE_EXONS];
	char *esplits[GP_FILE_EXONS];
	int index, temp;
	int bcount, ecount;
	int q_lastid;
	long int i_line; // line iterator
	int counter; // counter for sums
	char path[1024];
	string fname = "";
	string astring;

	long long count_genes = 0;
	long long count_exons = 0;

	char* actual_chrom;


time_t now = time(0);
tm* localtm = localtime(&now);
cout << "Start: " << asctime(localtm) << endl;

	// parse arguments
	int c;
	while ((c = getopt(argc, argv, "ivwqr:x:s:fm:deunbp:c:g:hlzy:")) != -1) {
		switch (c) {
			case 'h':
				printf("Usage: %s -options \n"
					"	-h : print this help page \n"
					"	-l : list all projects \n"
					"	-x config : config file, default is config.txt \n"
					"	-p project : use selected project \n"
					"	-e : list gene sets in project \n"
					"	-u : list cluster groups in project \n"
					"	-n : create new project \n"
					"	-d : delete selected project \n"
					"	-g directory : load genes from files in selected directory \n"
					"	-c group : work with selected cluster \n"
					"	-b set1 set2 ... : create cluster on selected gene sets \n"
					"	-f group : filter genes in selected clustered group \n"
					"	-m directory : set maf files location (use with -f and -r) \n"
					"	-s reference_species species1 species2 ... : set species to process (use with -f) \n"
					"	-r directory : generate filtered genes alignments \n"
					"	-q : export filtered genes \n"
					"	-y val : ommit borders \n"
					"	-v : do not cluster genes for export \n"
					"	-i : delimit exons in export \n"
					"	-o: aligned regions in mafs are ordered by position \n"
					"	-z : do not check start/stop codon \n"
					" \n"
					"Examples: \n"
					"	%s -n -p test -g genes_dir -b -c test_cluster geneset1 geneset2 - load and cluster genes \n"
					"	%s -p test -f -c test_cluster -m maf_dir -s ref spec1 spec2 - load project and filter genes on cluster \n"
					"	%s -p test -c test_cluster -m maf_dir -r result_dir -s ref spec1 spec2 - make results \n",
					argv[0], argv[0], argv[0], argv[0]);
				return 1;
				break;
			case 'x':
				config_file = strdup(optarg);
				break;
			case 's':
				ref_genome = strdup(optarg);
				break;
			case 'm':
				mafs_dir = strdup(optarg);
				break;
			case 'f':
				do_filter = true;
				break;
			case 'b':
				do_cluster = true;
				break;
			case 'c':
				cluster_group = strdup(optarg);
				break;
			case 'g':
				do_gp = true;
				genes_dir = strdup(optarg);
				break;
			case 'l':
				do_list = true;
				break;
			case 'e':
				do_list_sets = true;
				break;
			case 'u':
				do_list_clusters = true;
				break;
			case 'p':
				project = optarg;
				do_test = true;
				break;
			case 'n':
				do_create = true;
				break;
			case 'd':
				do_delete = true;
				break;
			case 'w':
				do_extended = true;
				break;
			case 'r':
				do_result = true;
				fasta_dir = strdup(optarg);
				break;
			case 'z':
				do_borders = false;
				break;
			case 'q':
				do_export = true;
				break;
			case 'y':
				ommit_range = atoi(optarg);
				break;
			case 'v':
				do_allexport = true;
				break;
			case 'i':
				do_delimit = true;
				break;
			case '?':
				if (optopt == 'x' || optopt == 'p' || optopt == 'g' || optopt == 'c' || optopt == 'f' || optopt == 'm' || optopt == 'r' || optopt == 's' || optopt == 'y')
					cout << "Option -" << (char)optopt << " requires an argument. \n";
				else
					cout << "Unknown argument " << (char)optopt << ". \n";
				return 1;
				break;
		}
	}

/*
	cout << "Configuration: " << config_file << "\n";
	cout << "Project: " << project << "\n";
	cout << "Genes directory: " << genes_dir << "\n";
	cout << "Cluster group: " << cluster_group << "\n";
	cout << "Mafs directory: " << mafs_dir << "\n";
	cout << "Results directory: " << fasta_dir << "\n";
	cout << "Trunc borders: " << ommit_range << "\n";
	cout << "\n";
*/

	// read config file
	FILE *file = fopen(config_file, "r");
	if (file != NULL) {

		// read lines
		while (fgetstr(line, sizeof line, file) != NULL) {

			// skip comments
			if (strncmp(line, "#", 1) == 0) {
				continue;
			}

			index = 0;
			for (word = strtok(line, "="); word; word = strtok(NULL, "=")) {
				splits[index] = new char[1024];
				strcpy(splits[index], word);
				index++;
			}

			if (strcmp(splits[0], "db_name")  == 0) {
				DB_NAME = strdup(splits[1]);
			}
			if (strcmp(splits[0], "db_host")  == 0) {
				DB_HOST = strdup(splits[1]);
			}
			if (strcmp(splits[0], "db_user")  == 0) {
				DB_USER = strdup(splits[1]);
			}
			if (strcmp(splits[0], "db_passw")  == 0) {
				DB_PASSW = strdup(splits[1]);
			}

			if (strcmp(splits[0], "filt_alignment")  == 0) {
				FILT_ALIGNMENT = atoi(splits[1]);
			}
			if (strcmp(splits[0], "filt_synteny")  == 0) {
				FILT_SYNTENY = atoi(splits[1]);
			}
			if (strcmp(splits[0], "filt_frame")  == 0) {
				FILT_FRAME = atoi(splits[1]);
			}

			for (int i=0; i<index; i++) {
				delete splits[i];
			}

		}

		fclose(file);
	}

/* for (index = optind; index < argc; index++) {
	cout << "Unknown argument " << argv[index] << "\n";
	return 2; }*/

	// database initialization
	dbi_initialize(NULL);
	conn = dbi_conn_new("mysql");

	// database connection
	dbi_conn_set_option(conn, "host", DB_HOST);
	dbi_conn_set_option(conn, "username", DB_USER);
	dbi_conn_set_option(conn, "password", DB_PASSW);
	dbi_conn_set_option(conn, "dbname", DB_NAME);
	dbi_conn_set_option(conn, "encoding", "UTF-8");
	dbi_conn_set_option(conn, "mysql_client_multi_statements", "1");
	dbi_conn_set_option(conn, "mysql_client_multi_results", "1");
	dbi_conn_set_option(conn, "mysql_client_interactive", "1");
	dbi_conn_set_option(conn, "mysql_client_compress", "1");

	if (dbi_conn_connect(conn) < 0) {
		printf("Could not connect to database. \n");
		return 3;
	}

	conn2 = dbi_conn_new("mysql");

	// database connection
	dbi_conn_set_option(conn2, "host", DB_HOST);
	dbi_conn_set_option(conn2, "username", DB_USER);
	dbi_conn_set_option(conn2, "password", DB_PASSW);
	dbi_conn_set_option(conn2, "dbname", DB_NAME);
	dbi_conn_set_option(conn2, "encoding", "UTF-8");
	dbi_conn_set_option(conn2, "mysql_client_multi_statements", "1");
	dbi_conn_set_option(conn2, "mysql_client_interactive", "1");
	dbi_conn_set_option(conn2, "mysql_client_compress", "1");

	if (dbi_conn_connect(conn2) < 0) {
		printf("Could not connect to database. \n");
		return 3;
	}

	// create main table
	query = new char[1000];
	sprintf(query, 
		"CREATE TABLE IF NOT EXISTS `projects` ("
		"id INT AUTO_INCREMENT, "
		"`name` VARCHAR(200), "
		"PRIMARY KEY (id)) "
		"CHARACTER SET utf8 COLLATE utf8_general_ci;");
	dbi_conn_query(conn, query);
	delete[] query;

	// list projects
	if (do_list) {
		cout << "Listing projects: \n";

		query = new char[1000];
		sprintf(query,
			"SELECT name FROM projects;");
		result = dbi_conn_query(conn, query);
		delete[] query;

		if (result) {
			while (dbi_result_next_row(result)) {
				word = (char*)dbi_result_get_string(result, "name");
				cout << word << "\n";
		}	}
		dbi_result_free(result);

		return 0;
	}

	// list project sets
	if (do_list_sets) {
		cout << "Listing project gene sets: \n";

		query = new char[1000];
		sprintf(query,
			"SELECT `name` FROM %s_sets;",
			project);
		result = dbi_conn_query(conn, query);
		delete[] query;

		if (result) {
			while (dbi_result_next_row(result)) {
				word = (char*)dbi_result_get_string(result, "name");
				cout << word << "\n";
		}	}
		dbi_result_free(result);

		return 0;
	}

	// list project clusters
	if (do_list_clusters) {
		cout << "Listing project cluster sets: \n";

		query = new char[1000];
		sprintf(query,
			"SELECT `name` FROM %s_groups;",
			project);
		result = dbi_conn_query(conn, query);
		delete[] query;

		if (result) {
			while (dbi_result_next_row(result)) {
				word = (char*)dbi_result_get_string(result, "name");
				cout << word << "\n";
		}	}
		dbi_result_free(result);

		return 0;
	}

	// delete existing project
	if (do_delete) {
		cout << "Deleting project: " << project << "\n";

		query = new char[1000];
		sprintf(query, 
			"DELETE FROM `projects` WHERE `name` = '%s'",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		// drop tables
		drop_table((char*)"genes");
		drop_table((char*)"exons");
		drop_table((char*)"sets");
		drop_table((char*)"groups");
		drop_table((char*)"grouplists");
		drop_table((char*)"clusters");
	}

	// create new project
	if (do_create) {
		cout << "Creating project: " << project << "\n";

		query = new char[1000];
		sprintf(query, 
			"SELECT count(`id`) as `cnt` FROM `projects` "
			"WHERE `name` = '%s';",
			project);
		result = dbi_conn_query(conn, query);
		delete[] query;

		long long count_project = 0;
		if (result) {
			while (dbi_result_next_row(result)) {
			count_project = dbi_result_get_longlong(result, "cnt");
		}	}
		dbi_result_free(result);

		if (count_project > 0) {
			cout << "Project with same name already exists. \n";
			return 5;
		}

		query = new char[1000];
		sprintf(query, 
			"INSERT INTO `projects` "
			"(`name`) "
			"VALUES "
			"('%s');",
			project);
		dbi_conn_query(conn, query);
		delete[] query;
	}

	// test project
	if (do_test) {
		cout << "Loading project " << project << ". \n";

		query = new char[1000];
		sprintf(query, 
			"SELECT count(`id`) as cnt FROM `projects` "
			"WHERE `name` = '%s';",
			project);
		result = dbi_conn_query(conn, query);
		delete[] query;

		long long count_project = 0;
		if (result) {
			while (dbi_result_next_row(result)) {
			count_project = dbi_result_get_longlong(result, "cnt");
		}	}
		dbi_result_free(result);

		if (count_project == 0) {
			cout << "Unknown project '" << project << "'. \n";
			return 4;
		}
		if (count_project > 1) {
			cout << "Duplicate project name, inconsistent database. \n";
			return 5;
		}
	}

	// load genes
	if (do_gp) {
		cout << "Loading genes... \n";

		// create tables
		query = new char[10000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_genes ("
			"id INT AUTO_INCREMENT, "
			"`xset` INT, "
			"`sid` INT, "
			"`name` VARCHAR(200), "
			"`chrom` VARCHAR(200), "
			"`strand` VARCHAR(200), "
			"`begin` INT, "
			"`end` INT, "
			"`exon_count` INT, "
			"`exon_length` INT, "
			"PRIMARY KEY (`id`), "
			"KEY (`name`), "
			"KEY (`begin`), "
			"KEY (`end`), "
			"KEY `dual` (`chrom`,`xset`)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_exons ("
			"id INT AUTO_INCREMENT, "
			"`xgene` INT, "
			"`begin` INT, "
			"`end` INT, "
			"PRIMARY KEY (id), "
			"KEY (xgene), "
			"KEY (begin), "
			"KEY (end)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_sets ("
			"id INT AUTO_INCREMENT, "
			"`name` VARCHAR(200), "
			"PRIMARY KEY (id)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		// variables
		dirp = opendir(genes_dir);
		string fname = "";
		int exon_length, exon_begin, exon_end;

		counter = 0;
		while ((dp = readdir(dirp)) != NULL) {
			fname = dp->d_name;

			// process genepred files
			if(fname.substr(fname.find_last_of(".") + 1) == "gp") {
				cout << "Processing genepred file " << fname << "...\n";

				word = strdup(fname.substr(0, fname.find_last_of(".")).c_str());

				// create gene set record
				query = new char[1000];
				sprintf(query, 
					"INSERT INTO %s_sets " 
					"(`name`) " 
					"VALUES "
					"('%s'); ",
					project,
					word);
				dbi_conn_query(conn, query);
				delete[] query;

				delete word;

				// get geneset id
				geneset = dbi_conn_sequence_last(conn, NULL);

				// parse file
				strcpy(path, genes_dir);
				strcat(path, "/");
				strcat(path, fname.c_str());
				FILE *file = fopen(path, "r");
				if (file != NULL)
				{
					map<string,int> chrom_count;

					i_line = 0;
					while (fgets(line, sizeof line, file) != NULL) {
						i_line++;

						// split gene line
						index = 0;
						for (word = strtok(line, "\t"); word; word = strtok(NULL, "\t")) {
							splits[index] = new char[GP_FILE_LINE];
							strcpy(splits[index], word);
							index++;
						}

						if (index < GP_FILE_COLS) {
							cout << "Incorrect line format, skipping... "
								<< "[line " << i_line <<"] \n";
							continue;
						}

						//split exons
						bcount = 0;
						for (word = strtok(splits[8], ","); word; word = strtok(NULL, ",")) {
							if (atoi(word) > 0) {
								bsplits[bcount] = new char[GP_FILE_WORD];
								strcpy(bsplits[bcount], word);
								bcount++;
							}
						}

						ecount = 0;
						for (word = strtok(splits[9], ","); word; word = strtok(NULL, ",")) {
							if (atoi(word) > 0) {
								esplits[ecount] = new char[GP_FILE_WORD];
								strcpy(esplits[ecount], word);
								ecount++;
							}
						}

						if (bcount != ecount || atoi(splits[7]) != bcount) {
							cout << "Different exons count and exons boundaries count, skipping... "
								<< "[line " << i_line <<"] \n";
							continue;
						}

						// count exon_length
						exon_length = 0;
						int memory = -10;
						for (int i=0; i<bcount; i++) {
							exon_begin = max(atoi(bsplits[i]), atoi(splits[5]));
							exon_end = min(atoi(esplits[i]), atoi(splits[6]));

							if (exon_begin < memory+4) {
								exon_length = -1;
								break;
							}

							if (exon_end > exon_begin) {
								exon_length += exon_end - exon_begin;
							}

							memory = exon_end;
						}

						// skip invalid exon position
						if (exon_length == -1) {
							cout << "Invalid exon position, skipping gene... "
								<< "[line " << i_line <<"] \n";
						}

						// skip zero coding length
						else if (exon_length == 0) {
							cout << "Exons coding length is zero, skipping gene... "
								<< "[line " << i_line <<"] \n";
						}

						else {
							// count in-chrom id
							if (chrom_count.count(splits[1]) > 0) {
								chrom_count.find(splits[1])->second++;
							} else {
								chrom_count.insert(pair<string,int>(splits[1], 1));
							}

							query = new char[1000];
							sprintf(query, 
								"INSERT INTO %s_genes " 
								"(`xset`, `sid`, `name`, `chrom`, `strand`, `begin`, `end`, `exon_count`, `exon_length`) " 
								"VALUES "
								"(%i, %i, '%s', '%s', '%s', %i, %i, %i, %i); ",
								project,
								geneset,
								chrom_count.find(splits[1])->second,
								splits[0], 
								splits[1],
								splits[2],
								atoi(splits[5]),
								atoi(splits[6]),
								atoi(splits[7]),
								exon_length);
							dbi_conn_query(conn, query);
							delete[] query;

							q_lastid = dbi_conn_sequence_last(conn, NULL);

							query = new char[16384];
							sprintf(query,
								"INSERT INTO %s_exons "
								"(`xgene`, `begin`, `end`) "
								"VALUES ",
								project);
							for (int i=0; i<atoi(splits[7]); i++) {
								subquery = new char[1024];
								if (i==0)
									sprintf(subquery, "(%i, %i, %i) ",
									q_lastid, 
									atoi(bsplits[i]), 
									atoi(esplits[i]));
								else
									sprintf(subquery, ", (%i, %i, %i)",
									q_lastid, 
									atoi(bsplits[i]), 
									atoi(esplits[i]));
								strcat(query, subquery);
								delete[] subquery;
							}
							dbi_conn_query(conn, query);
//							cout << query << "\n";
							delete[] query;

							counter++;
						}

						// release memory
						for (int i=0; i<index; i++) {
							delete[] splits[i];
						}
						for (int i=0; i<bcount; i++) {
							delete[] bsplits[i];
						}
						for (int i=0; i<ecount; i++) {
							delete[] esplits[i];
						}
					}
					fclose(file);
				}

				// chyba pri otvarani suboru
				else
				{
					perror(path); 
				}

			}

		}
		(void)closedir(dirp);

		cout << "Loaded " << counter << " genes. \n";

		cout << "Loading done. \n";
	}

	// cluster genes
	if (do_cluster) {

		cout << "Starting clustering... \n";
		strcpy(sets_list, "");

		// create tables
		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_groups ("
			"`id` INT AUTO_INCREMENT, "
			"`name` VARCHAR(200), "
			"PRIMARY KEY (id)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_grouplists ("
			"`id` INT AUTO_INCREMENT, "
			"`xgroup` INT, "
			"`xset` INT, "
			"PRIMARY KEY (`id`)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS %s_clusters ("
			"`id` INT AUTO_INCREMENT, "
			"`xgroup` INT, "
			"`xgene` INT, "
			"`cluster` INT, "
			"PRIMARY KEY (`id`), "
			"KEY `dual` (`xgroup`,`xgene`)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		// create cluster group
		query = new char[1000];
		sprintf(query,
			"INSERT INTO %s_groups "
			"(`name`) "
			"VALUES "
			"('%s'); ",
			project,
			cluster_group);
		result = dbi_conn_query(conn, query);
		delete[] query;

		clusterset = dbi_conn_sequence_last(conn, NULL);

		int chrom_total = 0;

		// loop on genesets
		for (index = optind; index < argc; index++) {

			// get geneset from input
			query = new char[1000];
			sprintf(query, 
				"SELECT `id` FROM `%s_sets` "
				"WHERE `name` = '%s';",
				project,
				argv[index]);
			result = dbi_conn_query(conn, query);
			delete[] query;

			int geneset = 0;
			if (result) {
				while (dbi_result_next_row(result)) {
				geneset = dbi_result_get_int(result, "id");
			}	}
			dbi_result_free(result);

			// add geneset to clusterset
			query = new char[1000];
			sprintf(query,
				"INSERT INTO `%s_grouplists` "
				"(`xgroup`, `xset`) "
				"VALUES "
				"(%i, %i); ",
				project,
				clusterset,
				geneset);
			result = dbi_conn_query(conn, query);
			delete[] query;

			// add geneset to list
			if (strlen(sets_list) > 0) {
				strcat(sets_list, (char*)",");
			}
			sprintf(sword, "%d", geneset);
			strcat(sets_list, sword);

			sets_count++;
			sets_array[sets_count] = geneset;

//			geneset_list.insert(geneset);
		}

		// count chromosomes
		query = new char[1000];
		sprintf(query,
			"SELECT count(distinct(`chrom`)) AS `cnt` FROM `%s_genes` WHERE `xset` IN (%s);",
			project,
			sets_list);
		result = dbi_conn_query(conn, query);
		delete[] query;

		if (result) {
			while (dbi_result_next_row(result)) {
				chrom_total = dbi_result_get_longlong(result, "cnt");
		}	}
		dbi_result_free(result);

		char** chrom_list = new char*[chrom_total+1];

		query = new char[1000];
		sprintf(query,
			"SELECT distinct(`chrom`) AS `chr` FROM `%s_genes` WHERE `xset` IN (%s) ORDER BY `chr`;",
			project,
			sets_list);
		result = dbi_conn_query(conn, query);
		delete[] query;

		index = 0;
		if (result) {
			while (dbi_result_next_row(result)) {
				index++;
				word = (char*)dbi_result_get_string(result, "chr");
				chrom_list[index] = new char[100];
				strcpy(chrom_list[index], word);
			}
		}
		dbi_result_free(result);

		int c_index = 1; // init cluster counter
		int total;

		// cluster by chromosomes
		for (int chrom_actual=1; chrom_actual<=chrom_total; chrom_actual++) {

			cout << "Clustering gene " << chrom_list[chrom_actual] << "... \n";

			// count offset values
			calculate_offsets(chrom_list[chrom_actual]);

			// cluster variables
			int *gene_id;
			int *gene_number;
			int *exon_number;
			int *exon_xgene;

			// get genes count
			query = new char[1000];
			sprintf(query,
				"SELECT count(`id`) AS `cnt` FROM `%s_genes` WHERE `xset` IN (%s) and `chrom` = '%s';",
				project,
				sets_list,
				chrom_list[chrom_actual]);
			result = dbi_conn_query(conn, query);
			delete[] query;

			if (result) {
				while (dbi_result_next_row(result)) {
				count_genes = dbi_result_get_longlong(result, "cnt");
			}	}
			dbi_result_free(result);

			// get exons count
			query = new char[1000];
			sprintf(query,
				"SELECT count(E.id) AS `cnt` FROM %s_exons E "
				"LEFT JOIN %s_genes G ON E.xgene = G.id "
				"WHERE G.xset IN (%s) and G.chrom = '%s';",
				project,
				project,
				sets_list,
				chrom_list[chrom_actual]);
			result = dbi_conn_query(conn, query);
			delete[] query;

			if (result) {
				while (dbi_result_next_row(result)) {
				count_exons = dbi_result_get_longlong(result, "cnt");
			}	}
			dbi_result_free(result);

			cout << "Clustering " << count_exons << " exons on " << count_genes << " genes. \n";

			// setting arrays
			gene_number = new int[count_genes+1];
			gene_id = new int[count_genes+1];
			exon_number = new int[count_exons+1];
			exon_xgene = new int[count_exons+1];
			
			int* exon_net_up = new int[count_exons+1];
			int* exon_net_down = new int[count_exons+1];
			int* exon_net_left = new int[count_exons+1];
			int* exon_net_right = new int[count_exons+1];
			int* exon_net_return = new int[count_exons+1];

			for (int i=1; i<=count_genes; i++) {
				gene_number[i] = -1;
			}

			for (int i=1; i<=count_exons; i++) {
				exon_number[i] = -1;

				exon_net_up[i] = -1;
				exon_net_down[i] = -1;
				exon_net_left[i] = -1;
				exon_net_right[i] = -1;
				exon_net_return[i] = -1;
			}

			// get all exons from sets
			query = new char[1000];
			sprintf(query,
				"SELECT E.xgene, E.begin, E.end, G.xset, G.sid FROM %s_exons E "
				"LEFT JOIN %s_genes G ON G.id = E.xgene "
				"WHERE G.xset IN (%s) and G.chrom = '%s' "
				"ORDER BY E.begin;",
				project,
				project,
				sets_list,
				chrom_list[chrom_actual]);
			result = dbi_conn_query(conn, query);
			delete[] query;

			int q_gene, q_begin, q_end, q_cluster, q_sid, q_set;
			int actual_gene;
			int right_bound = -1;

			if (result) {
				index = 1;
				while (dbi_result_next_row(result)) {
					// ziskanie hodnot
					q_gene = dbi_result_get_int(result, "xgene");
					q_begin = dbi_result_get_int(result, "begin");
					q_end = dbi_result_get_int(result, "end");
					q_sid = dbi_result_get_int(result, "sid");
					q_set = dbi_result_get_int(result, "xset");

					// mapovanie genov podla ID
					actual_gene = offset[q_set] + q_sid;
					gene_id[actual_gene] = q_gene;
					exon_xgene[index] = actual_gene;

					// primarne klustrovanie
/*					if (q_begin > right_bound) {
						c_index++;
					} */

					// vertikalne spojenia
					if (index > 1 && q_begin <= right_bound) {
						exon_net_down[index-1] = index;
						exon_net_up[index] = index-1;
					}

					right_bound = max(right_bound, q_end);

					index++;
				}
				dbi_result_free(result);
			}

			cout << "... \n";

			// horizontalne spojenia
			for (int i=1; i<=count_exons; i++) {
				actual_gene = exon_xgene[i];
				if (gene_number[actual_gene] > -1) {
					exon_net_right[gene_number[actual_gene]] = i;
					exon_net_left[i] = gene_number[actual_gene];
				}
				gene_number[actual_gene] = i;
			}

			int actual_exon, last_exon, direction;

			// prechadzanie komponentov
			total = 0;
			for (int i=1; i<=count_exons; i++) {
				if (exon_number[i] > 0) continue;

				exon_number[i] = c_index;
				exon_net_left[i] = 0;
				exon_net_return[i] = 0;

				actual_exon = i;
				last_exon = i;
				direction = 0;

				while (true) {

					exon_number[actual_exon] = c_index;

					temp = 0;
					if (exon_net_left[actual_exon] == 0) temp++;
					if (exon_net_right[actual_exon] == 0) temp++;
					if (exon_net_up[actual_exon] == 0) temp++;
					if (exon_net_down[actual_exon] == 0) temp++;

					if (temp > 1) {
						if (direction == 1) {
							exon_net_right[actual_exon] = -1;
							actual_exon = last_exon;
						}
						if (direction == 2) {
							exon_net_left[actual_exon] = -1;
							actual_exon = last_exon;
						}
						if (direction == 3) {
							exon_net_down[actual_exon] = -1;
							actual_exon = last_exon;
						}
						if (direction == 4) {
							exon_net_up[actual_exon] = -1;
							actual_exon = last_exon;
						}

						actual_exon = last_exon;
					}

					if (temp == 1 || temp == 0) {
						if (exon_net_left[actual_exon] > 0) {
							last_exon = actual_exon;
							actual_exon = exon_net_left[actual_exon];
							exon_net_left[last_exon] = -1;
							exon_net_right[actual_exon] = 0;
							direction = 1;

							if (exon_net_return[actual_exon] == -1) exon_net_return[actual_exon] = last_exon;
						} 
						else if (exon_net_right[actual_exon] > 0) {
							last_exon = actual_exon;
							actual_exon = exon_net_right[actual_exon];
							exon_net_right[last_exon] = -1;
							exon_net_left[actual_exon] = 0;
							direction = 2;

							if (exon_net_return[actual_exon] == -1) exon_net_return[actual_exon] = last_exon;
						} 
						else if (exon_net_up[actual_exon] > 0) {
							last_exon = actual_exon;
							actual_exon = exon_net_up[actual_exon];
							exon_net_up[last_exon] = -1;
							exon_net_down[actual_exon] = 0;
							direction = 3;

							if (exon_net_return[actual_exon] == -1) exon_net_return[actual_exon] = last_exon;
						} 
						else if (exon_net_down[actual_exon] > 0) {
							last_exon = actual_exon;
							actual_exon = exon_net_down[actual_exon];
							exon_net_down[last_exon] = -1;
							exon_net_up[actual_exon] = 0;
							direction = 4;

							if (exon_net_return[actual_exon] == -1) exon_net_return[actual_exon] = last_exon;
						} 
						else {
							last_exon = actual_exon;
							if (exon_net_return[actual_exon] > 0) {
								actual_exon = exon_net_return[actual_exon];
							} 
							else { 
								break;
							}
						}
					}

				}
				c_index++;
				total++;
			}

			cout << "... \n";

			// priradenie clustrov genom
			for (int i=1; i<=count_exons; i++) {
				gene_number[exon_xgene[i]] = exon_number[i];
			}

			cout << "Created " << total << " clusters. \n";

			cout << "Saving clusters... \n";

			for (int i=1; i<=count_genes; i++)
			{
				q_gene = gene_id[i];
				q_cluster = gene_number[i];
				query = new char[1000];
				sprintf(query, 
					"INSERT INTO %s_clusters " 
					"(`xgroup`, `xgene`, `cluster`) " 
					"VALUES " 
					"(%i, %i, %i); ",
					project,
					clusterset,
					q_gene, 
					q_cluster);
				dbi_conn_query(conn, query);
//				cout << query << "\n";
				delete[] query;
			}

			// unsetting arrays
			delete[] gene_number;
			delete[] exon_number;
			delete[] exon_xgene;
			delete[] gene_id;

			delete[] exon_net_left;
			delete[] exon_net_right;
			delete[] exon_net_up;
			delete[] exon_net_down;
			delete[] exon_net_return;
		}

		delete[] chrom_list;

		cout << "Totally created " << c_index-1 << " clusters. \n";

		cout << "Clustering done. \n";
	}

	// filter genes
	if (do_filter) {
		
		cout << "Filtering genes... \n";
		strcpy(sets_list, "");

		// create tables
		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS `%s_results` ("
			"`id` INT AUTO_INCREMENT, "
			"`xset` INT, "
			"`species` VARCHAR(200), "
			"`maf` VARCHAR(200), "
			"PRIMARY KEY (`id`), "
			"KEY `dual` (`species`,`maf`)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		query = new char[1000];
		sprintf(query, 
			"CREATE TABLE IF NOT EXISTS `%s_errors` ("
			"`id` INT AUTO_INCREMENT, "
			"`xresult` INT, "
			"`xgene` INT, "
			"`test` VARCHAR(200), "
			"`desc` VARCHAR(200), "
			"`note` VARCHAR(200), "
			"`begin` INT, "
			"`end` INT, "
			"PRIMARY KEY (`id`), "
			"KEY `dual` (`xresult`,`xgene`)) "
			"CHARACTER SET utf8 COLLATE utf8_general_ci;",
			project);
		dbi_conn_query(conn, query);
		delete[] query;

		// filtering variables
		int ref_position;
		int old_ref_position;
		int nref_position;

		char* ref_text;
		char* spec_text;
		char spec_triplet[3];

		bool spec_block = false, ref_block = false;
		int addr;

		// count species
		spec_count = 0;
		for (index = optind; index < argc; index++) {
			spec_count++;
		}
		spec_list = new char*[spec_count+1];

		// assign species
		counter = 0;
		for (index = optind; index < argc; index++) {
			counter++;
			spec_list[counter] = argv[index];
		}

		// progress variables
		int gene_work=0, gene_done=0;
		int exon_work=0, exon_done=0;

		char* new_chrom;
		char* old_chrom;
		int new_position;
		int old_position;
		char old_strand;
		char new_strand;

		char spec_duplet[2];
		int left_border, right_border;

		// maf file variables
		char f_species[64], f_chrom[64], f_strand, f_text[65536];
		int f_begin;

		// build sets_list
		query = new char[1000];
		sprintf(query, 
			"SELECT S.id as `qset` FROM `%s_groups` R "
			"JOIN %s_grouplists S ON S.xgroup = R.id "
			"WHERE R.name = '%s';",
			project,
			project,
			cluster_group);
		result = dbi_conn_query(conn, query);
		delete[] query;

		sets_count = 0;
		if (result) {
			int qset = 0;
			while (dbi_result_next_row(result)) {
				qset = dbi_result_get_int(result, "qset");

				if (strlen(sets_list) > 0) {
					strcat(sets_list, (char*)",");
				}
				sprintf(sword, "%d", qset);
				strcat(sets_list, sword);

				sets_count++;
				sets_array[sets_count] = qset;
			}
			dbi_result_free(result);
		}

		// get cluster group
		query = new char[1000];
		sprintf(query, 
			"SELECT id FROM `%s_groups` "
			"WHERE `name` = '%s';",
			project,
			cluster_group);
		result = dbi_conn_query(conn, query);
		delete[] query;

		// process species
		for (int sp=1; sp<=spec_count; sp++) {
			actual_spec = spec_list[sp];
			cout << "Processing species " << actual_spec << ": \n";

			for (int ii=1; ii<=sets_count; ii++) {
				// insert result, get resultset
				query = new char[1000];
				sprintf(query,
					"INSERT INTO `%s_results` "
					"(`xset`, `species`, `maf`) "
					"VALUES "
					"(%i, '%s', '%s'); ",
					project,
					sets_array[ii],
					actual_spec,
					mafs_dir);
				result = dbi_conn_query(conn, query);
				delete[] query;

				resultset[sets_array[ii]] = dbi_conn_sequence_last(conn, NULL);
			}

			// preprocess files

			// process chromosomes
			dirp = opendir(mafs_dir);
			while ((dp = readdir(dirp)) != NULL) {
				fname = dp->d_name;

				if(fname.substr(fname.find_last_of(".") + 1) == "maf") {
					actual_chrom = strdup(fname.substr(0, fname.find_last_of(".")).c_str());
					cout << "Processing chromozome " << actual_chrom << "... \n";

					// initialization
					gene_work = gene_done = 0;
					exon_work = exon_done = 0;

					// count offset values
					calculate_offsets(actual_chrom);

					// get genes count
					query = new char[1000];
					sprintf(query,
						"SELECT count(id) AS cnt FROM %s_genes WHERE chrom='%s' and `xset` in (%s);",
						project,
						actual_chrom,
						sets_list);
					result = dbi_conn_query(conn, query);
					delete[] query;

					if (result) {
						while (dbi_result_next_row(result)) {
						count_genes = dbi_result_get_longlong(result, "cnt");
					}	}
					dbi_result_free(result);

					// get exons count
					query = new char[1000];
					sprintf(query,
						"SELECT count(E.id) AS cnt FROM %s_exons E "
						"LEFT JOIN %s_genes G ON E.xgene = G.id "
						"WHERE G.chrom = '%s' and G.xset in (%s)",
						project,
						project,
						actual_chrom,
						sets_list);
					result = dbi_conn_query(conn, query);
					delete[] query;

					if (result) {
						while (dbi_result_next_row(result)) {
						count_exons = dbi_result_get_longlong(result, "cnt");
					}	}
					dbi_result_free(result);

					cout << "Filtering " << count_exons << " exons on " << count_genes << " genes... \n";

					// allocate memory
					gene_begin = new int[count_genes+1];
					gene_end = new int[count_genes+1];
					gene_xid = new int[count_genes+1];
					gene_set = new int[count_genes+1];
					gene_sid = new int[count_genes+1];
					gene_in_set = new int[count_genes+1];
					gene_strand = new char[count_genes+1];

					exon_begin = new int[count_exons+1];
					exon_end = new int[count_exons+1];
					exon_set = new int[count_exons+1];
					exon_sid = new int[count_exons+1];

					spec_frame = new int[count_genes+1];
					ref_frame = new int[count_genes+1];
					frame_begin = new int[count_genes+1];
					frame_end = new int[count_genes+1];

					frame_fix = new int[count_genes+1];
					frame_var = new int[count_genes+1];

					for (int i=0; i<= count_genes; i++) {
						spec_frame[i] = 0;
						ref_frame[i] = 0;
						frame_begin[i] = -1;
						frame_end[i] = -1;

						frame_fix[i] = 0;
						frame_var[i] = 0;
					}

					// load genes
					query = new char[1000];
					sprintf(query,
						"SELECT begin, end, id, sid, xset, strand  FROM %s_genes "
						"WHERE chrom = '%s' AND xset in (%s) "
						"ORDER BY begin;",
						project,
						actual_chrom,
						sets_list);
					result = dbi_conn_query(conn, query);
					delete[] query;

					if (result) {
						index = 1;
						while (dbi_result_next_row(result)) {
							// ziskanie hodnot
							gene_sid[index] = dbi_result_get_int(result, "sid");
							gene_set[index] = dbi_result_get_int(result, "xset");
							temp = offset[gene_set[index]] + gene_sid[index];

							gene_in_set[temp] = dbi_result_get_int(result, "xset");
							gene_xid[temp] = dbi_result_get_int(result, "id");
							gene_begin[temp] = dbi_result_get_int(result, "begin");
							gene_end[temp] = dbi_result_get_int(result, "end");
							astring = dbi_result_get_string(result, "strand");
							gene_strand[temp] = astring[0];
							index++;
						}
						dbi_result_free(result);
					}

					cout << "... \n";

					// load exons
					query = new char[1000];
					sprintf(query,
						"SELECT E.xgene, E.begin, E.end, G.sid, G.xset  FROM %s_exons E "
						"LEFT JOIN %s_genes G ON E.xgene = G.id "
						"WHERE G.chrom = '%s' AND G.xset in (%s) "
						"ORDER BY E.begin;",
						project,
						project,
						actual_chrom,
						sets_list);
					result = dbi_conn_query(conn, query);
					delete[] query;

					if (result) {
						index = 1;
						while (dbi_result_next_row(result)) {
							// ziskanie hodnot
							exon_begin[index] = dbi_result_get_int(result, "begin");
							exon_end[index] = dbi_result_get_int(result, "end");
							exon_sid[index] = dbi_result_get_int(result, "sid");
							exon_set[index] = dbi_result_get_int(result, "xset");

							index++;
						}
						dbi_result_free(result);
					}

					cout << "... \n";

					// parse file
					strcpy(path, mafs_dir);
					strcat(path, "/");
					strcat(path, fname.c_str());
					FILE *file = fopen(path, "r");
					if (file != NULL) {
						i_line = 0;
						spec_triplet[0] = 'N';
						spec_triplet[1] = 'N';
						spec_triplet[2] = 'N';

						new_position = old_position = 0;
						new_chrom = strdup("");
						old_chrom = strdup("");
						ref_position = nref_position = 0;
						old_ref_position = 0;
						new_strand = '.';
						old_strand = '.';

						count_queries = 0;

						// nonsense frame init
						ref_frame_actual = 0;

						// read lines
						while (fgets(line, sizeof line, file) != NULL) {
							i_line++;

							// skip comments
							if (strncmp(line, "#", 1) == 0) {
								continue;
							}

							// read align block
							if (strncmp(line, "a", 1) == 0) {
								spec_block = false;
								ref_block = false;

								spec_text = new char[MAF_FILE_LINE];
								ref_text = new char[MAF_FILE_LINE];

								while (fgets(line, sizeof line, file) != NULL) {
									i_line++;

									// read all S lines
									// format: s species.chrom start ## +- ## text
									if (strncmp(line, "s", 1) == 0) {
										// split line
										index = 0;
										for (word = strtok(line, " "); word; word = strtok(NULL, " ")) {
											splits[index] = new char[MAF_FILE_LINE];
											strcpy(splits[index], word);
											index++;
										}

										// check columns count
										if (index != MAF_FILE_COLS) {
											cout << "Incorrect line format, skipping... "
												<< "[line " << i_line <<"] \n";
											continue;
										}

										// load values
										astring = string(splits[1]);
										strcpy(f_species, (char*)astring.substr(0, astring.find(".")).c_str());
										strcpy(f_chrom, (char*)astring.substr(astring.find(".")+1).c_str());
										f_begin = atoi(splits[2]);
										f_strand = splits[4][0];
										strcpy(f_text, splits[6]);

										// assign values
										if (strcmp(ref_genome, f_species) == 0) {
											strcpy(ref_text, f_text);
											ref_block = true;
											nref_position = f_begin -1; // important
										}

										if (strcmp(actual_spec, f_species) == 0) {
											strcpy(spec_text, f_text);
											spec_block = true;
											new_position = f_begin -1; // important
											new_strand = f_strand;
											strcpy(new_chrom, f_chrom);
										}

										for (int i=0; i<index; i++) {
											delete[] splits[i];
										}

									}

									// process aligned block
									else {

										if (spec_block && !ref_block) {
											cout << "Alignment without reference species, skipping... "
											<< "[line " << i_line <<"] \n";
										}

										else if (spec_block && nref_position < ref_position) {
											cout << "Aligned block out of order, skipping... "
											<< "[line " << i_line <<"] \n";
										}

										else if (spec_block && strlen(ref_text) != strlen (spec_text)) {
											cout << "Aligned strings have different length, skipping... "
											<< "[line " << i_line <<"] \n";
										}

										else if (spec_block) {
											ref_position = nref_position;

											// add working
											for (int i=gene_work+1; i<=count_genes; i++) {
												addr = addr_gene(i);
												if (gene_begin[addr] > ref_position +2) {
													break;
												}
												else {
													gene_work++;
												}
											}
											for (int i=exon_work+1; i<=count_exons; i++) {
												if (exon_begin[i] > ref_position +2) {
													break;
												}
												else {
													exon_work++;
												}
											}

											// initial settings

											// alignment
											if (abs(ref_position - old_ref_position) > FILT_ALIGNMENT) {
												for (int i=gene_done+1; i<=gene_work; i++) {
													// error, alignment 
													addr = addr_gene(i);
													finalize_frame(addr, old_ref_position);
													frame_begin[addr] = -1;
													ref_frame[addr] = 0;
													spec_frame[addr] = 0;

													left_border = max(old_ref_position+1, gene_begin[addr]);
													right_border = min(ref_position, gene_end[addr]);
													if (left_border + 5 <= right_border) {
														add_error(addr, (char*)"alignment", left_border, right_border);
													}
												}
											}

											// position jump, not in first block
											if (abs(old_position - new_position) > FILT_SYNTENY && old_position>0) {
												for (int i=exon_done+1; i<=exon_work; i++) {
													if (exon_end[i] > ref_position && exon_begin[i] < old_ref_position) {
														// error, synteny, position 
														addr = addr_exon(i);
														add_error(addr, (char*)"synteny", old_ref_position, ref_position, (char*)"position");
													}
												}
											}

											// chromosome change, not in first block
											if (strcmp(old_chrom, new_chrom) != 0 && strcmp(old_chrom, "") != 0) {
												for (int i=exon_done+1; i<=exon_work; i++) {
													if (exon_end[i] > ref_position && exon_begin[i] < old_ref_position) {
														// error, synteny, chrom 
														addr = addr_exon(i);
														add_error(addr, (char*)"synteny", old_ref_position, ref_position, (char*)"chrom");
													}
												}
											}

											// strand change, not in first block
											if (old_strand != new_strand && old_strand == '.') {
												for (int i=exon_done+1; i<=exon_work; i++) {
													if (exon_end[i] > ref_position && exon_begin[i] < old_ref_position) {
														// error, synteny, strand 
														addr = addr_exon(i);
														add_error(addr, (char*)"synteny", old_ref_position, ref_position, (char*)"strand");
													}
												}
											}

											// remove done
											for (int i=gene_done+1; i<=count_genes; i++) {
												addr = addr_gene(i);
												if (gene_end[addr] > ref_position -2) {
													break;
												}
												else {
													finalize_frame(addr, ref_position);
													gene_done++;
												}
											}
											for (int i=exon_done+1; i<=count_exons; i++) {
												if (exon_end[i] > ref_position -2) {
													break;
												}
												else {
													exon_done++;
												}
											}

											// process base by base 
											for (unsigned int pos=0; pos<strlen(ref_text); pos++) {
												if (ref_text[pos] == '\n') {
													continue;
												}

												// move position
												if (ref_text[pos] != '-') {
													ref_position++;
													ref_frame_actual = (ref_frame_actual+1) %3;
												}
												if (spec_text[pos] != '-') {
													new_position++;
												}

												// add working
												for (int i=gene_work+1; i<=count_genes; i++) {
													addr = addr_gene(i);
													if (gene_begin[addr] > ref_position +2) {
														break;
													}
													else {
														gene_work++;
													}
												}

												for (int i=exon_work+1; i<=count_exons; i++) {
													if (exon_begin[i] > ref_position +2) {
														break;
													}
													else {
														exon_work++;
													}
												}

												// update triplet
												if (spec_text[pos] != '-') {
													spec_triplet[0] = spec_triplet[1];
													spec_triplet[1] = spec_triplet[2];
													spec_triplet[2] = toupper(spec_text[pos]);
												}

												if (spec_text[pos] != '-') {
												for (int i=gene_done+1; i<=gene_work; i++) {
													addr = addr_gene(i);

													// test start
													if (ref_position == gene_begin[addr]+2) {
													if (gene_strand[addr] == '+') {
														if (strcmp(spec_triplet, "ATG") != 0) {
															add_error(addr, (char*)"start", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
														} 
													}
													if (gene_strand[addr] == '-') {
														if (strcmp(spec_triplet, "TTA") != 0
														&& strcmp(spec_triplet, "TCA") != 0
														&& strcmp(spec_triplet, "CTA") != 0) {
															add_error(addr, (char*)"start", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
														} 
													}
													}

													// test stop
													if (ref_position == gene_end[addr]-1) {
													if (gene_strand[addr] == '+') {
														if (strcmp(spec_triplet, "TAA") != 0
														&& strcmp(spec_triplet, "TGA") != 0
														&& strcmp(spec_triplet, "TAG") != 0) {
															add_error(addr, (char*)"stop", ref_position-2, ref_position, (char*)"", spec_triplet);
														}
													}
													if (gene_strand[addr] == '-') {
														if (strcmp(spec_triplet, "CAT") != 0) {
															add_error(addr, (char*)"stop", ref_position-2, ref_position, (char*)"", spec_triplet);
														}
													}
													}


												} }

												// extended start+stop
												if (do_extended) {
												if (spec_text[pos] != '-') {
													for (int i=gene_done+1; i<=gene_work; i++) {
														addr = addr_gene(i);

														if (ref_position == gene_begin[addr]+2) {
														if (gene_strand[addr] == '+') {
															if (strcmp(spec_triplet, "ATG") == 0) {
																add_error(addr, (char*)"gstart", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
															}
														}
														if (gene_strand[addr] == '-') {
															if (strcmp(spec_triplet, "TTA") == 0
															|| strcmp(spec_triplet, "TCA") == 0
															|| strcmp(spec_triplet, "CTA") == 0) {
																add_error(addr, (char*)"gstart", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
															}
														}
														}

														if (ref_position == gene_end[addr]-1) {
														if (gene_strand[addr] == '+') {
															if (strcmp(spec_triplet, "TAA") == 0
															|| strcmp(spec_triplet, "TGA") == 0
															|| strcmp(spec_triplet, "TAG") == 0) {
																add_error(addr, (char*)"gstop", ref_position-2, ref_position, (char*)"", spec_triplet);
															}
														}
														if (gene_strand[addr] == '-') {
															if (strcmp(spec_triplet, "CAT") == 0) {
																add_error(addr, (char*)"gstop", ref_position-2, ref_position, (char*)"", spec_triplet);
															}
														}
														}


													}
												} }

												// test splice sites
												spec_duplet[0] = spec_triplet[1];
												spec_duplet[1] = spec_triplet[2];

												if (spec_text[pos] != '-') {
												for (int i=exon_done+1; i<=exon_work; i++) {
													addr = addr_exon(i);

													if (gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {

														if (ref_position == exon_end[i]+1) {
														if (gene_strand[addr] == '+') {
															if ((spec_duplet[0] != 'G' || spec_duplet[1] != 'T')
															&& (spec_duplet[0] != 'G' || spec_duplet[1] != 'C')) {
																add_error(addr, (char*)"splice", ref_position-1, ref_position, (char*)"begin", spec_duplet);
															}
														}
														if (gene_strand[addr] == '-') {
															if (spec_duplet[0] != 'C' || spec_duplet[1] != 'T') {
																add_error(addr, (char*)"splice", ref_position-1, ref_position, (char*)"begin", spec_duplet);
															}
														}
														}

														if (ref_position == exon_begin[i]-1) {
														if (gene_strand[addr] == '+') {
															if (spec_duplet[0] != 'A' || spec_duplet[1] != 'G') {
																add_error(addr, (char*)"splice", ref_position-1, ref_position, (char*)"end", spec_duplet);
															}
														}
														if (gene_strand[addr] == '-') {
															if ((spec_duplet[0] != 'A' || spec_duplet[1] != 'C')
															&& (spec_duplet[0] != 'G' || spec_duplet[1] != 'C')) {
																add_error(addr, (char*)"splice", ref_position-1, ref_position, (char*)"end", spec_duplet);
															}
														}
														}

													}
												} }

												// extended splice sites
												if (do_extended) {
												if (spec_text[pos] != '-') {
													for (int i=exon_done+1; i<=exon_work; i++) {
														addr = addr_exon(i);

														if (gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {

															if (ref_position == exon_end[i]+1) {
															if (gene_strand[addr] == '+') {
																if ((spec_duplet[0] == 'G' && spec_duplet[1] == 'T')
																|| (spec_duplet[0] == 'G' && spec_duplet[1] == 'C')) {
																	add_error(addr, (char*)"gsplice", ref_position-1, ref_position, (char*)"begin", spec_duplet);
																}
															}
															if (gene_strand[addr] == '-') {
																if (spec_duplet[0] == 'C' && spec_duplet[1] == 'T') {
																	add_error(addr, (char*)"gsplice", ref_position-1, ref_position, (char*)"begin", spec_duplet);
																}
															}
															}

															if (ref_position == exon_begin[i]-1) {
															if (gene_strand[addr] == '+') {
																if (spec_duplet[0] == 'A' && spec_duplet[1] == 'G') {
																	add_error(addr, (char*)"gsplice", ref_position-1, ref_position, (char*)"end", spec_duplet);
																}
															}
															if (gene_strand[addr] == '-') {
																if ((spec_duplet[0] == 'A' && spec_duplet[1] == 'C')
																|| (spec_duplet[0] == 'G' && spec_duplet[1] == 'C')) {
																	add_error(addr, (char*)"gsplice", ref_position-1, ref_position, (char*)"end", spec_duplet);
																}
															}
															}

														}
													}
												} }

												// move frame
												if (ref_text[pos] != '-' && spec_text[pos] == '-') {
												for (int i=exon_done+1; i<=exon_work; i++) {
													addr = addr_exon(i);
													if (exon_begin[i] <= ref_position && exon_end[i] >= ref_position
													&& gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {
														ref_frame[addr] = (ref_frame[addr]+1) % 3;
													}
												} }
												if (ref_text[pos] == '-' && spec_text[pos] != '-') {
												for (int i=exon_done+1; i<=exon_work; i++) {
													addr = addr_exon(i);
													if (exon_begin[i] <= ref_position && exon_end[i] >= ref_position
													&& gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {
														spec_frame[addr] = (spec_frame[addr]+1) % 3;
													}
												} }

												// test frame, for in-exons
												if ((ref_text[pos] == '-' || spec_text[pos] == '-')
												&& (ref_text[pos] != '-' || spec_text[pos] != '-')) {

												for (int i=exon_done+1; i<=exon_work; i++) {
													addr = addr_exon(i);

													if (exon_begin[i] <= ref_position && exon_end[i] >= ref_position
													&& gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {

														if (frame_begin[addr] == -1 && ref_frame[addr] != spec_frame[addr]) {
															frame_begin[addr] = ref_position;
														}
														if (frame_begin[addr] >= 0 && ref_frame[addr] == spec_frame[addr]) {
															frame_end[addr] = ref_position;
														}

														if (frame_end[addr] >= 0 && ref_frame[addr] != spec_frame[addr]) {
															if (frame_end[addr] + 5 <= ref_position) {
																finalize_frame(addr, ref_position);
																frame_begin[addr] = ref_position;
															} else {
																frame_end[addr] = -1;
															}
														}
													}
												} }

												// synchronize nonsense frames
												if (spec_text[pos] != '-') {
													for (int i=exon_done+1; i<=exon_work; i++) {
														addr = addr_exon(i);

														if (exon_begin[i] <= ref_position && exon_end[i] >= ref_position
														&& gene_begin[addr] <= ref_position && gene_end[addr] >= ref_position) {
															// open frame
															if (gene_begin[addr] == ref_position) {
																frame_var[addr] = ((frame_fix[addr]+3) - ref_frame_actual) %3;
															}
															if (exon_begin[i] == ref_position && gene_begin[addr] <= ref_position) {
																frame_var[addr] = ((frame_fix[addr]+3) - ref_frame_actual) %3;
															}

															// close frame
															if (exon_end[i] == ref_position) {
																frame_fix[addr] = frame_fix[addr] + frame_var[addr] + ref_frame_actual;
																frame_var[addr] = 0;
															}
														}
													}
												}

												// test bad codon, for in-exons
												if (ref_text[pos] != '-') {

												if (strcmp(spec_triplet, "TAA") == 0 || strcmp(spec_triplet, "TGA") == 0 || strcmp(spec_triplet, "TAG") == 0) {
													for (int i=exon_done+1; i<=exon_work; i++) {
														addr = addr_exon(i);

														if (gene_strand[addr] == '+') {
															temp = 3 + spec_frame[addr] - ref_frame[addr];
															temp = temp + frame_fix[addr] + frame_var[addr] + ref_frame_actual;

															if ((temp % 3) == 2) {
															if (exon_begin[i] < ref_position && exon_end[i] > ref_position
															&& gene_begin[addr]+2 < ref_position && gene_end[addr]-1 > ref_position) {
																add_error(addr, (char*)"nonsense", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
															} }
														}
													}
												}

												if (strcmp(spec_triplet, "TTA") == 0 || strcmp(spec_triplet, "TCA") == 0 || strcmp(spec_triplet, "CTA") == 0) {
													for (int i=exon_done+1; i<=exon_work; i++) {
														addr = addr_exon(i);

														if (gene_strand[addr] == '-') {
															temp = 3 + spec_frame[addr] - ref_frame[addr];
															temp = temp + frame_fix[addr] + frame_var[addr] + ref_frame_actual;

															if ((temp % 3) == 2) {
															if (exon_begin[i] < ref_position && exon_end[i] > ref_position
															&& gene_begin[addr]+2 < ref_position && gene_end[addr]-1 > ref_position) {
																add_error(addr, (char*)"nonsense", ref_position-2, ref_position, (char*)"", (char*)spec_triplet);
															} }
														}
													}
												}

												}

												// remove done
												for (int i=gene_done+1; i<=count_genes; i++) {
													addr = addr_gene(i);
													if (gene_end[addr] > ref_position -2) {
														break;
													}
													else {
														finalize_frame(addr, ref_position);
														gene_done++;
													}
												}

												for (int i=exon_done+1; i<=count_exons; i++) {
													if (exon_end[i] > ref_position -2) {
														break;
													}
													else {
														exon_done++;
													}
												}

											}

										}

										old_strand = new_strand;
										old_position = new_position;
										old_ref_position = ref_position;
										strcpy(old_chrom, new_chrom);

										delete[] spec_text;
										delete[] ref_text;

										break;
									}
								}
							}


						}

//						add_error(-1, (char*)"special", 0, 1, actual_chrom);
//						flush_errors();

						fclose(file);

						// finalize 
						for (int i=gene_done+1; i<=count_genes; i++) {
							addr = addr_gene(i);
							finalize_frame(addr, old_ref_position);

							// error, alignment 
							left_border = max(old_ref_position, gene_begin[addr]);
							right_border = gene_end[addr];
							if (left_border + 5 <= right_border) {
								add_error(addr, (char*)"alignment", left_border, right_border);
							}

						}
						flush_errors();

					}

					// chyba pri otvarani suboru
					else
					{
						perror(path); 
					}

					// release memory
					delete[] gene_begin;
					delete[] gene_end;
					delete[] gene_xid;
					delete[] gene_set;
					delete[] gene_sid;
					delete[] gene_strand;

					delete[] exon_begin;
					delete[] exon_end;
					delete[] exon_sid;
					delete[] exon_set;

					delete[] ref_frame;
					delete[] spec_frame;
					delete[] frame_begin;
					delete[] frame_end;

				}

			}
			(void)closedir(dirp);
		}

		delete[] spec_list;

		cout << "Filtering done. \n";
	}

	// make results
	if (do_result) {
		cout << "Creating results... \n";

		// count species
		spec_count = 0;
		for (index = optind; index < argc; index++) {
			spec_count++;
		}
		spec_list = new char*[spec_count+1];

		// assign species
		counter = 0;
		for (index = optind; index < argc; index++) {
			counter++;
			spec_list[counter] = argv[index];
		}

		// get cluster by input
		query = new char[1000];
		sprintf(query, 
			"SELECT `id` FROM `%s_groups` "
			"WHERE `name` = '%s';",
			project,
			cluster_group);
		result = dbi_conn_query(conn, query);
		delete[] query;

		int clusterset = 0;
		if (result) {
			while (dbi_result_next_row(result)) {
			clusterset = dbi_result_get_int(result, "id");
		}	}
		dbi_result_free(result);

		results_list = new char*[spec_count+1];
		// prepare result lists
		for (int sp=1; sp<=spec_count; sp++) {
			actual_spec = spec_list[sp];

			query = new char[1000];
			sprintf(query,
				"SELECT id FROM %s_results WHERE species='%s' and maf='%s'; ",
				project,
				actual_spec,
				mafs_dir);
			result = dbi_conn_query(conn, query);
			delete[] query;

			results_list[sp] = new char[5*MAX_SETS];

			if (result) {
				qresult = 0;
				while (dbi_result_next_row(result)) {
					qresult = dbi_result_get_int(result, "id");

					if (strlen(results_list[sp]) > 0) {
						strcat(results_list[sp], (char*)",");
					}
					sprintf(sword, "%d", qresult);
					strcat(results_list[sp], sword);
				}
			}
			dbi_result_free(result);

		}

		ofstream of, cof;
		ofstream of_sel;
		strcpy(path, fasta_dir);
		strcat(path, "/corthy_results");
		of.open(path, ios::out|ios::trunc); 

		strcpy(path, fasta_dir);
		strcat(path, "/corthy_results_cmp");
		cof.open(path, ios::out|ios::trunc); 

		bool selected = false; 

		// matrix of selected genes 
		int** selected_genes;
		int* selected_refs;
		int* selected_strand;
		char** selected_names;
		int selected_count = 0;

		// process chromosomes
		dirp = opendir(mafs_dir);
		while ((dp = readdir(dirp)) != NULL) {
			fname = dp->d_name;

			if(fname.substr(fname.find_last_of(".") + 1) == "maf") {
				actual_chrom = strdup(fname.substr(0, fname.find_last_of(".")).c_str());
				cout << "Processing chromozome " << actual_chrom << "... \n";

				selected_genes = new int*[10000];
				selected_names = new char*[10000];
				selected_refs = new int[10000];
				selected_strand = new int[10000];

				query = new char[1000];
				sprintf(query,
					"SELECT G.name, G.id, G.begin, G.end, G.exon_length, G.strand, C.cluster FROM %s_genes G "
					"LEFT JOIN %s_clusters C on C.xgene = G.id "
					"WHERE chrom='%s' AND C.xgroup = %i AND G.exon_length>5 ORDER BY C.cluster; ",
					project,
					project,
					actual_chrom,
					clusterset);
				result2 = dbi_conn_query(conn, query);
				delete[] query;

				if (result2) {

					int cluster_number, last_cluster_number = 0;
					int codelength, last_codelength = 0;
					int coverage = 0, last_coverage = 0;
					char temp_strand, max_strand = '+';

					int temp_ref = -1, max_ref = -1;
					int* temp_gene = new int[spec_count+1];
					int* max_gene = new int[spec_count+1];
					char* max_name = new char[1000];
					char* temp_name = new char[1000];

					selected_count = 0;
					index = 0;
					while (dbi_result_next_row(result2)) {
						// ziskanie hodnot
						qresult = dbi_result_get_int(result2, "id");
						qbegin = dbi_result_get_int(result2, "begin");
						qend = dbi_result_get_int(result2, "end");
						word = (char*)dbi_result_get_string(result2, "name");
						cluster_number = dbi_result_get_int(result2, "cluster");
						codelength = dbi_result_get_int(result2, "exon_length");
						strcpy(temp_name, word);
						astring = dbi_result_get_string(result2, "strand");
						temp_strand = astring.c_str()[0];

						if (last_coverage > 0 && cluster_number != last_cluster_number) {
							// save previous cluster best
							selected_count++;
							selected_refs[selected_count] = max_ref;
							selected_names[selected_count] = new char[1000];
							strcpy(selected_names[selected_count], max_name);

							selected_genes[selected_count] = new int[spec_count+1];
							for (int ii=1; ii<=spec_count; ii++) {
								selected_genes[selected_count][ii] = max_gene[ii];
							}

							if (max_strand == '-') {
								selected_strand[selected_count] = 0;
							} else { 
								selected_strand[selected_count] = 1;
							}

							// re-init valus
							last_cluster_number = cluster_number;
							last_codelength = 0;
							last_coverage = 0;
						}

						selected = false;
						// process by species
						for (int sp=1; sp<=spec_count; sp++) {
							actual_spec = spec_list[sp];

							subquery = new char[1000];
							sprintf(subquery, " AND E.end >= (G.begin + %.2f * (G.end-G.begin)) "
								"AND E.begin <= (G.begin + %.2f * (G.end-G.begin)) ",
								(float)ommit_range/100,
								1-(float)ommit_range/100);

							query = new char[1000];
							if (do_borders) {
							sprintf(query,
								"SELECT count(E.`id`) as cnt FROM `%s_errors` E "
								"LEFT JOIN `%s_genes` G on G.id=E.xgene "
								"WHERE `xgene`=%i %s "
								"AND `xresult` in (%s) "
								"AND (`test`='alignment' OR `test`='frame' OR `test`='splice' OR `test`='start' OR `test`='stop' OR `test`='synteny' OR `test`='nonsense'); ",
								project,
								project,
								qresult,
								subquery,
								results_list[sp]);
							} else {
							sprintf(query,
								"SELECT count(E.`id`) as cnt FROM `%s_errors` E "
								"LEFT JOIN `%s_genes` G on G.id=E.xgene "
								"WHERE `xgene`=%i %s "
								"AND `xresult` in (%s) "
								"AND (`test`='alignment' OR `test`='frame' OR `test`='splice' OR `test`='synteny' OR `test`='nonsense'); ",
								project,
								project,
								qresult,
								subquery,
								results_list[sp]);
							}
//							cout << query << "\n";

							result = dbi_conn_query(conn, query);
							delete[] query;
							delete[] subquery;

							qcount = -1;
							if (result) {
								while (dbi_result_next_row(result)) {
									qcount = dbi_result_get_longlong(result, "cnt");
								}
							}
							dbi_result_free(result);

							if (qcount == 0) {
								index++;

								if (selected == false) {
									selected = true;

									temp_ref = qresult;
									for (int ii=1; ii<=spec_count; ii++) {
										temp_gene[ii] = 0;
									}
								}

								coverage++;
								temp_gene[sp] = 1;
							}

						}
						if (selected) {
							if (do_allexport || coverage > last_coverage ||
							(coverage == last_coverage && codelength > last_codelength)) {
								last_coverage = coverage;
								last_codelength = codelength;

								max_ref = temp_ref;
								max_strand = temp_strand;
								strcpy(max_name, temp_name);
								for (int ii=1; ii<=spec_count; ii++) {
									max_gene[ii] = temp_gene[ii];
								}
							}
							if (do_allexport) {
								last_cluster_number = -1;
							}
						}

					}

					if (last_coverage > 0) {
						selected_count++;
						selected_refs[selected_count] = max_ref;
						selected_names[selected_count] = new char[1000];
						strcpy(selected_names[selected_count], max_name);

						selected_genes[selected_count] = new int[spec_count+1];
						for (int ii=1; ii<=spec_count; ii++) {
							selected_genes[selected_count][ii] = max_gene[ii];
						}
					}

					delete[] temp_gene;
					delete[] max_gene;

				}
				dbi_result_free(result2);

				// fill corthy_results
				for (int i=1; i<=selected_count; i++) {
					of << selected_names[i] << " ";
					for (int ii=1; ii<=spec_count; ii++) {
					if (selected_genes[i][ii] == 1) {
						of << spec_list[ii] << ",";
					} }
					of << "\n";
				}

				cout << "Found " << index << " genes without errors. \n";

				if (do_export) {

				cout << "Exporting filtered genes... \n";

				// SQL to get exons in selected genes
				char* selected_list = new char[2000*200];
				strcpy(selected_list, (char*)"-1");
				for (int i=1; i<=selected_count; i++) {
					strcat(selected_list, ",");
					sprintf(tempchar, "%d", selected_refs[i]);
					strcat(selected_list, tempchar);
				}
//				cout << selected_list << "\n";

				// get exons count
				query = new char[2000*200];
				sprintf(query,
					"SELECT count(E.id) AS cnt FROM %s_exons E "
					"LEFT JOIN %s_genes G ON E.xgene = G.id "
					"WHERE G.id in (%s);",
					project,
					project,
					selected_list);
				result = dbi_conn_query(conn, query);
				delete[] query;

				if (result) {
					while (dbi_result_next_row(result)) {
					count_exons = dbi_result_get_longlong(result, "cnt");
				}	}
				dbi_result_free(result);

				cout << "Exporting " << count_exons << " exons... \n";

				// allocate memory
				exon_begin = new int[count_exons+1];
				exon_end = new int[count_exons+1];
				exon_gene_begin = new int[count_exons+1];
				exon_gene_end = new int[count_exons+1];
				exon_gene_name = new char*[count_exons+1];
				exon_gene_id = new int[count_exons+1];
				gene_name = new char*[selected_count+1];

				map<string,int> genes_map;
				int genes_key;
				map<string,int>::iterator it;

				// store string, will create phy files
				char*** exported_genes = new char**[selected_count+1];
				for (int i=0; i<=selected_count; i++) {
					gene_name[i] = new char[2000];

					exported_genes[i] = new char*[spec_count+1];
					for (int j=0; j<=spec_count; j++) {
						exported_genes[i][j] = new char[200*200];
						strcpy(exported_genes[i][j], (char*)"");
					}
				}

				// load exons
				query = new char[2000*200];
				sprintf(query,
					"SELECT E.xgene, E.begin as nbegin, E.end as nend, G.begin as gbegin, G.end as gend, G.name  FROM %s_exons E "
					"LEFT JOIN %s_genes G ON E.xgene = G.id "
					"WHERE G.id in (%s) "
					"ORDER BY E.begin;",
					project,
					project,
					selected_list);
				result = dbi_conn_query(conn, query);
				delete[] query;

				if (result) {
					index = 1;
					genes_key = 1;
					while (dbi_result_next_row(result)) {
						// ziskanie hodnot
						exon_begin[index] = dbi_result_get_int(result, "nbegin");
						exon_end[index] = dbi_result_get_int(result, "nend");
						exon_gene_begin[index] = dbi_result_get_int(result, "gbegin");
						exon_gene_end[index] = dbi_result_get_int(result, "gend");
						astring = dbi_result_get_string(result, "name");
						exon_gene_name[index] = new char[1000];
						strcpy(exon_gene_name[index], (char*)astring.c_str());

						it = genes_map.find(astring);
						if (it == genes_map.end()) {
							genes_map.insert(it, pair<string,int>(astring,genes_key)); 
							genes_key++;
						}
						it = genes_map.find(astring);
						exon_gene_id[index] = it->second;
						strcpy(gene_name[exon_gene_id[index]], (char*)astring.c_str());

						index++;
					}
					dbi_result_free(result);
				}

				cout << "... \n";

				// export selected genes from MAF

				// filtering variables
				int ref_position;
				int old_ref_position;
				int nref_position;

				char* ref_text;
				char** spec_text = new char*[spec_count+1];

				// initialization
				int exon_work, exon_done;

				int left_border, right_border;

				bool ref_block;
				bool* spec_block = new bool[spec_count+1];

				exon_work = exon_done = 0;

				char** spec_chrom = new char*[spec_count+1];
				int* spec_position = new int[spec_count+1];
				char spec_strand = '+';
				int spec_length = 0;

				// maf file variables
				char f_species[64], f_chrom[64], f_text[65536];
				int f_begin, f_length;
				char f_strand;

				// comparision vars
				int* exported_begin = new int[selected_count+1];
				int* exported_end = new int[selected_count+1];
				int* exported_strand = new int[selected_count+1];
				char** exported_chrom = new char*[selected_count+1];
				for (int i=0; i<=selected_count; i++) {
					exported_chrom[i] = new char[100];
					strcpy(exported_chrom[i], (char*)"none");
					exported_begin[i] = -1;
					exported_end[i] = -1;
				}

				// parse file
				strcpy(path, mafs_dir);
				strcat(path, "/");
				strcat(path, actual_chrom);
				strcat(path, ".maf");
				FILE *file = fopen(path, "r");
				if (file != NULL) {
					i_line = 0;

					for (int sp=1; sp<=spec_count; sp++) {
						spec_position[sp] = 0;
						spec_chrom[sp] = strdup("");
					}

					ref_position = nref_position = 0;
					old_ref_position = 0;

					// read lines
					while (fgets(line, sizeof line, file) != NULL) {
						i_line++;

						// skip comments
						if (strncmp(line, "#", 1) == 0) {
							continue;
						}

						// read align block
						if (strncmp(line, "a", 1) == 0) {
							for (int sp=1; sp<=spec_count; sp++) {
								spec_block[sp] = false;
							}
							ref_block = false;

							for (int i=1; i<=spec_count; i++) {
								spec_text[i] = new char[MAF_FILE_LINE];
							}
							ref_text = new char[MAF_FILE_LINE];

							while (fgets(line, sizeof line, file) != NULL) {
								i_line++;

								// read all S lines
								// format: s species.chrom start ## +- ## text
								if (strncmp(line, "s", 1) == 0) {
									// split line
									index = 0;
									for (word = strtok(line, " "); word; word = strtok(NULL, " ")) {
										splits[index] = new char[MAF_FILE_LINE];
										strcpy(splits[index], word);
										index++;
									}

									// check columns count
									if (index != MAF_FILE_COLS) {
										cout << "Incorrect line format, skipping... "
											<< "[line " << i_line <<"] \n";
										continue;
									}

									// load values
									astring = string(splits[1]);
									strcpy(f_species, (char*)astring.substr(0, astring.find(".")).c_str());
									strcpy(f_chrom, (char*)astring.substr(astring.find(".")+1).c_str());
									f_begin = atoi(splits[2]);
									f_strand = splits[4][0];
									f_length = atoi(splits[5]);
									strcpy(f_text, splits[6]);

									// assign values
									if (strcmp(ref_genome, f_species) == 0) {
										strcpy(ref_text, f_text);
										ref_block = true;
										nref_position = f_begin -1; // important
									}

									for (int sp=1; sp<=spec_count; sp++) {
										if (strcmp(spec_list[sp], f_species) == 0) {
											spec_block[sp] = true;
											strcpy(spec_text[sp], f_text);

											spec_position[sp] = f_begin -1; // important
											strcpy(spec_chrom[sp], f_chrom);

											spec_length = f_length;
											spec_strand = f_strand;
										}
									}

									for (int i=0; i<index; i++) {
										delete[] splits[i];
									}

								}

								// process aligned block
								else {

									if (ref_block && nref_position < ref_position) {
										cout << "Aligned block out of order, skipping... "
										<< "[line " << i_line <<"] \n";
									}

									else if (ref_block) {
										ref_position = nref_position;

										// add working
										for (int i=exon_work+1; i<=count_exons; i++) {
											if (exon_begin[i] > ref_position +2) {
												break;
											}
											else {
												exon_work++;
											}
										}

										// initial settings

										// alignment
										if (ref_position > old_ref_position + 1) {
											for (int i=exon_done+1; i<=exon_work; i++) {
												// missing alignment 
												left_border = max(old_ref_position+1, exon_gene_begin[i]);
												right_border = min(ref_position-1, exon_gene_end[i]-1);
												if (left_border < right_border) {

													for (int ii=left_border; ii<=right_border; ii++) {
														strcat(exported_genes[exon_gene_id[i]][0], (char*)"N");
														for (int sp=1; sp<=spec_count; sp++) {
															strcat(exported_genes[exon_gene_id[i]][sp], (char*)"N");
														}
													}

												}
											}
										}

										// remove done
										for (int i=exon_done+1; i<=count_exons; i++) {
											if (exon_end[i] > ref_position -2) {
												break;
											}
											else {
												exon_done++;
											}
										}

										// process base by base 
										for (unsigned int pos=0; pos<strlen(ref_text); pos++) {
											if (ref_text[pos] == '\n') {
												continue;
											}

											// move position
											if (ref_text[pos] != '-') {
												ref_position++;
											}

											for (int sp=1; sp<=spec_count; sp++) {
												if (spec_block[sp] && spec_text[sp][pos] != '-') {
													spec_position[sp]++;
												}
											}

											// add working
											for (int i=exon_work+1; i<=count_exons; i++) {
												if (exon_begin[i] > ref_position +2) {
													break;
												}
												else {
													exon_work++;
												}
											}

											for (int i=exon_done+1; i<=exon_work; i++) {
												// test start and stop
												if (ref_position >= exon_gene_begin[i] && ref_position <= exon_gene_end[i]-1) {
												// skip stop codon
												if (ref_position >= exon_gene_begin[i]+3 || selected_strand[exon_gene_id[i]] == 1)
												if (ref_position <= exon_gene_end[i]-4 || selected_strand[exon_gene_id[i]] == 0)
												//test exon borders
												if (ref_position >= exon_begin[i] && ref_position <= exon_end[i]-1) {

													temp = strlen(exported_genes[exon_gene_id[i]][0]);
													exported_genes[exon_gene_id[i]][0][temp] = ref_text[pos];
													exported_genes[exon_gene_id[i]][0][temp+1] = '\0';

													if (do_delimit && ref_position == exon_end[i]-1 && ref_position < exon_gene_end[i]- 4) {
														strcat(exported_genes[exon_gene_id[i]][0], (char*)"|");
													}

													// comparement data
													if (ref_position > exon_gene_begin[i] +
													0.25*(exon_gene_end[i]-exon_gene_begin[i])
													&& exported_begin[exon_gene_id[i]] == -1) {
														strcpy(exported_chrom[exon_gene_id[i]], spec_chrom[1]);
														if (spec_strand == '+') {
															exported_begin[exon_gene_id[i]] = spec_position[1] - (ref_position - exon_gene_begin[i]);
															exported_strand[exon_gene_id[i]] = 1;
															exported_end[exon_gene_id[i]] = spec_position[1] + (exon_gene_end[i] - ref_position);
														}
														if (spec_strand == '-') {
															exported_begin[exon_gene_id[i]] = spec_length - spec_position[1] - (exon_gene_end[i] - ref_position);
															exported_strand[exon_gene_id[i]] = 0;
															exported_end[exon_gene_id[i]] = spec_length - spec_position[1] + (ref_position - exon_gene_begin[i]);
														}
													}

													for (int sp=1; sp<=spec_count; sp++) {
														if (spec_block[sp]) {
															temp = strlen(exported_genes[exon_gene_id[i]][sp]);
															exported_genes[exon_gene_id[i]][sp][temp] = spec_text[sp][pos];
															exported_genes[exon_gene_id[i]][sp][temp+1] = '\0';
														} else {
															if (ref_text[pos] == '-') {
																strcat(exported_genes[exon_gene_id[i]][sp], (char*)"-");
															} else {
																strcat(exported_genes[exon_gene_id[i]][sp], (char*)"N");
															}
														}

														if (do_delimit && ref_position == exon_end[i]-1 && ref_position < exon_gene_end[i]- 4) {
															strcat(exported_genes[exon_gene_id[i]][sp], (char*)"|");
														}

													}
												} }
											} 

											// remove done
											for (int i=exon_done+1; i<=count_exons; i++) {
												if (exon_end[i] > ref_position -2) {
													break;
												}
												else {
													exon_done++;
												}
											}

										}

									}

									old_ref_position = ref_position;

									for (int sp=1; sp<=spec_count; sp++) {
										delete[] spec_text[sp];
									}
									delete[] ref_text;

									break;
								}
							}
						}


					}

					fclose(file);

					// finalize 
					for (int i=exon_done+1; i<=count_exons; i++) {
						// error, alignment 
						left_border = max(old_ref_position+1, exon_gene_begin[i]);
						right_border = exon_gene_end[i]-1;
						if (left_border < right_border) {

							for (int ii=left_border; ii<=right_border; ii++) {
								strcat(exported_genes[exon_gene_id[i]][0], (char*)"N");
								for (int sp=1; sp<=spec_count; sp++) {
									strcat(exported_genes[exon_gene_id[i]][sp], (char*)"N");
								}
							}

						}
					}

				}

				// chyba pri otvarani suboru
				else
				{
					perror(path); 
				}

				// save to file
				for (int i=1; i<=selected_count; i++) {
					strcpy(path, fasta_dir);
					strcat(path, "/");
					strcat(path, gene_name[i]);
					strcat(path, ".phy");

					of_sel.open(path, ios::out|ios::trunc);
					of_sel << spec_count+1 << " " << strlen(exported_genes[i][0]) << "\n";

					if (selected_strand[i] == 0) {
						reverse(exported_genes[i][0]);
						of_sel << ref_genome << "\n" << exported_genes[i][0] << "\n";
						for (int sp=1; sp<=spec_count; sp++) {
							reverse(exported_genes[i][sp]);
							of_sel << spec_list[sp] << "\n" << exported_genes[i][sp] << "\n";
						}
					} else { 
						of_sel << ref_genome << "\n" << exported_genes[i][0] << "\n";
						for (int sp=1; sp<=spec_count; sp++) {
							of_sel << spec_list[sp] << "\n" << exported_genes[i][sp] << "\n";
						}
					}

					of_sel.close();

					// save compare data
					cof << gene_name[i] << " ";
//					cout << gene_name[i] << " ";
					cof << exported_begin[i] << " ";
					cof << exported_end[i] << " ";
//					cout << exported_begin[i] << " ";
					cof << exported_chrom[i] << " ";
					cof << exported_strand[i] << "\n";
//					cout << exported_chrom[i] << "\n";
				}

				// free memory
				delete[] exon_begin;
				delete[] exon_end;
				delete[] exon_gene_begin;
				delete[] exon_gene_end;
				delete[] exon_gene_name;
				delete[] exon_gene_id;
				delete[] gene_name;

				delete[] exported_genes;
				delete[] exported_begin;
				delete[] exported_end;
				delete[] exported_chrom;

				}

			}

		}
		of.close() ;
		cof.close() ;
		(void)closedir(dirp);

		delete[] spec_list;
		delete[] results_list;

		cout << "Results done. \n";
	}


	// database finalization
	dbi_conn_close(conn);
	dbi_conn_close(conn2);
	dbi_shutdown();

	cout << "Finished. \n";

now = time(0);
localtm = localtime(&now);
cout << "End: " << asctime(localtm) << endl;

	return 0;
}
