use strict;
use warnings;
use List::Util qw[min max];
use DBI;
use Getopt::Std;

my $host = "fs01";
my $database = "visnovectest";
my $user = "visnovec";
my $passwd = "2BhSMjV9GF";
my $connection = "DBI:mysql:".$database.";host=".$host;

my $db_handle = DBI->connect($connection, $user, $passwd);

$db_handle->do('DROP TABLE IF EXISTS ensembl');
$db_handle->do('DROP TABLE IF EXISTS compara');

$db_handle->do('CREATE TABLE IF NOT EXISTS ensembl (
	id INT AUTO_INCREMENT, 
	name VARCHAR(200), 
	xgene INT default -1, 
	strand INT default -1, 
	begin INT, 
	end INT, 
	chrom VARCHAR(200), 
	dir_match INT default -1, 
	sub_match INT default -1, 
	cluster INT default -1, 
	length INT default -1, 
	frame INT default -1, 
	splice INT default -1, 
	nonsense INT default -1, 
	alignment INT default -1, 
	synteny INT default -1, 
	PRIMARY KEY (id),
	KEY (name),
	KEY `dual` (begin, chrom))
	CHARACTER SET utf8 COLLATE utf8_general_ci;
	');

$db_handle->do('CREATE TABLE IF NOT EXISTS compara (
	id INT AUTO_INCREMENT, 
	name VARCHAR(200), 
	xgene INT default -1, 
	strand INT default -1, 
	begin INT, 
	end INT, 
	chrom VARCHAR(200), 
	dir_match INT default -1, 
	sub_match INT default -1, 
	cluster INT default -1, 
	length INT default -1, 
	frame INT default -1, 
	splice INT default -1, 
	nonsense INT default -1, 
	alignment INT default -1, 
	synteny INT default -1, 
	PRIMARY KEY (id),
	KEY (name),
	KEY `dual` (begin, chrom))
	CHARACTER SET utf8 COLLATE utf8_general_ci;
	');

sub mytrim {
	my ($self, $text) = @_;
	$text = $self
	if ref(\$self) =~ m/^SCALAR/i;
	return "" unless $text;
	$text =~ s/^\s+//;
	$text =~ s/\s+$//;
	return $text;
}

print "start\n";

my %Options;
getopt('s', \%Options);
my $species = $Options{'s'};

my $cmp_file;
my $ens_file;
my $xresults;

if ($species eq "ornAna") {
	$cmp_file = "corthy_results_cmp_ornAna";
	$ens_file = "one_platypus";
	$xresults = "29,30,31,32";
}
elsif ($species eq "taeGut") {
	$cmp_file = "corthy_results_cmp_taeGut";
	$ens_file = "one_zebrafinch";
	$xresults = "5,6,7,8";
}
elsif ($species eq "galGal") {
	$cmp_file = "corthy_results_cmp_galGal";
	$ens_file = "one_chicken";
	$xresults = "1,2,3,4";
}
else {
	print "unknown species, stopping..\n";
	exit;
}

open(CMP, $cmp_file) or die("Could not open cmp file.");
open(ENS, $ens_file) or die("Could not open ens file.");


print "loading CMP genes\n";

while (my $line = <CMP>)  {   
#	print $line;    

	my @file_cols = split(' ',mytrim($line));

	my $gene_name = $file_cols[0];
	my $gene_begin = $file_cols[1];
	my $gene_end = $file_cols[2];
	my $gene_chrom = $file_cols[3];
	my $gene_strand = $file_cols[4];

	my $query = "INSERT INTO compara (
		name, begin, end, chrom, strand)
		VALUES
		('$gene_name', '$gene_begin', '$gene_end', '$gene_chrom', '$gene_strand')
		;";

	$db_handle->do($query);

#	print($query);
}

print "loading ENS genes\n";

while (my $line = <ENS>)  {   
#	print $line;    

	my @file_cols = split(',',mytrim($line));

	my $gene_name = $file_cols[1];
	my $gene_begin = $file_cols[3];
	my $gene_end = $file_cols[4];
	my $gene_chrom = "chr".$file_cols[2];

	my $query = "INSERT INTO ensembl (
		name, begin, end, chrom)
		VALUES
		('$gene_name', '$gene_begin', '$gene_end', '$gene_chrom')
		;";

	$db_handle->do($query);

#	print($query);
}

print "joining...\n";

my $query = "SELECT id,name,begin,end,chrom FROM compara";
my $sth = $db_handle->prepare($query);
$sth->execute();
my @row;
while (@row = $sth->fetchrow_array) {

	$query = "SELECT G.id,G.name,G.begin,G.end,G.chrom,C.cluster FROM mono_genes G
		LEFT JOIN mono_clusters C ON C.xgene=G.id 
		WHERE C.xgroup=3 and G.name = '".$row[1]."'";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			$query = "UPDATE compara 
			SET xgene = ".$inrow[0].",
			cluster = ".$inrow[5]."
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}


$query = "SELECT id,name,begin,end,chrom FROM ensembl";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	$query = "SELECT G.id,G.name,G.begin,G.end,G.chrom,C.cluster FROM mono_genes G
		LEFT JOIN mono_clusters C ON C.xgene=G.id 
		WHERE C.xgroup=3 and G.name = '".$row[1]."'";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			$query = "UPDATE ensembl 
			SET xgene = ".$inrow[0].",
			cluster = ".$inrow[5]."
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}

$query = "SELECT id,name,begin,end,chrom FROM ensembl WHERE cluster = -1";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	$query = "SELECT id,name,begin,end,chrom,cluster FROM ensembl 
		WHERE (begin = ".$row[2]." OR end = ".$row[3].") AND chrom = '".$row[4]."' AND cluster > 0 LIMIT 1";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			$query = "UPDATE ensembl 
			SET cluster = ".$inrow[5]."
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}

print "comparing...\n";

$query = "SELECT id,name,begin,end,chrom FROM ensembl";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	$query = "SELECT id,name,begin,end,chrom FROM compara WHERE name = '".$row[1]."'";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			my $position = 0;
			if ($row[2]<$inrow[3] and $inrow[2]<$row[3] and $row[4] eq $inrow[4]) {
#			if (abs($row[2]-$inrow[2]) <= 100) {
				$position = 1;
#				print "position\n";
			}
			else {print $row[1].": ".$row[2]."-".$inrow[2]." ".$row[4]."-".$inrow[4]."\n";}

			$query = "UPDATE ensembl 
			SET dir_match = 1, sub_match = ".$position."
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}

$query = "SELECT id,name,begin,chrom,cluster FROM ensembl WHERE cluster > -1 and dir_match = -1";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	$query = "SELECT id,name,begin,chrom,dir_match,sub_match FROM ensembl 
		WHERE cluster = '".$row[4]."' AND dir_match > -1 LIMIT 1";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			$query = "UPDATE ensembl 
			SET dir_match = ".$inrow[4].",
			sub_match = ".$inrow[5]."
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}

print "filtering...\n";

$query = "SELECT id,name,begin,chrom,xgene FROM ensembl WHERE xgene > -1 and dir_match = -1";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	$query = "SELECT count(E.id) FROM mono_errors E
		LEFT JOIN mono_genes G on G.id = E.xgene
		WHERE xgene = ".$row[4]."
		AND xresult in (".$xresults.")
		AND E.end > (G.begin + 0.1 * (G.end-G.begin)) 
		AND E.begin < (G.begin + 0.9 * (G.end-G.begin))
		AND test='alignment'";
	my $tempsth = $db_handle->prepare($query);
	$tempsth->execute();
	my @temprow = $tempsth->fetchrow_array;
	my $alignment = $temprow[0];

	$query = "SELECT count(E.id) FROM mono_errors E
		LEFT JOIN mono_genes G on G.id = E.xgene
		WHERE xgene = ".$row[4]."
		AND xresult in (".$xresults.")
		AND E.end > (G.begin + 0.1 * (G.end-G.begin)) 
		AND E.begin < (G.begin + 0.9 * (G.end-G.begin))
		AND test='synteny'";
	$tempsth = $db_handle->prepare($query);
	$tempsth->execute();
	@temprow = $tempsth->fetchrow_array;
	my $synteny = $temprow[0];

	$query = "SELECT count(E.id) FROM mono_errors E
		LEFT JOIN mono_genes G on G.id = E.xgene
		WHERE xgene = ".$row[4]."
		AND xresult in (".$xresults.")
		AND E.end > (G.begin + 0.1 * (G.end-G.begin)) 
		AND E.begin < (G.begin + 0.9 * (G.end-G.begin))
		AND test='splice'";
	$tempsth = $db_handle->prepare($query);
	$tempsth->execute();
	@temprow = $tempsth->fetchrow_array;
	my $splice = $temprow[0];

	$query = "SELECT count(E.id) FROM mono_errors E
		LEFT JOIN mono_genes G on G.id = E.xgene
		WHERE xgene = ".$row[4]."
		AND xresult in (".$xresults.")
		AND E.end > (G.begin + 0.1 * (G.end-G.begin)) 
		AND E.begin < (G.begin + 0.9 * (G.end-G.begin))
		AND test='frame'";
	$tempsth = $db_handle->prepare($query);
	$tempsth->execute();
	@temprow = $tempsth->fetchrow_array;
	my $frame = $temprow[0];

	$query = "SELECT count(E.id) FROM mono_errors E
		LEFT JOIN mono_genes G on G.id = E.xgene
		WHERE xgene = ".$row[4]."
		AND xresult in (".$xresults.")
		AND E.end > (G.begin + 0.1 * (G.end-G.begin)) 
		AND E.begin < (G.begin + 0.9 * (G.end-G.begin))
		AND test='nonsense'";
	$tempsth = $db_handle->prepare($query);
	$tempsth->execute();
	@temprow = $tempsth->fetchrow_array;
	my $nonsense = $temprow[0];

	my $query = "UPDATE ensembl 
	SET dir_match = 0,
	frame = ".$frame.", nonsense = ".$nonsense.", synteny = ".$synteny.", alignment = ".$alignment.", splice = ".$splice."
	WHERE id=".$row[0]."";
	$db_handle->do($query);

}

$query = "SELECT id,name,begin,chrom,cluster FROM ensembl WHERE cluster > -1 and dir_match = -1";
$sth = $db_handle->prepare($query);
$sth->execute();
while (@row = $sth->fetchrow_array) {

	my $query = "SELECT id,name,begin,chrom,dir_match FROM ensembl 
		WHERE cluster = ".$row[4]." AND dir_match = 0 LIMIT 1";
	my $insth = $db_handle->prepare($query);
	$insth->execute();
	if ($insth->rows > 0) {

		my @inrow;
		while (@inrow = $insth->fetchrow_array) {
#			print "same\n";

			my $query = "UPDATE ensembl 
			SET dir_match = 0,
			WHERE id=".$row[0]."";
			$db_handle->do($query);
		}
	}

}

close(CMP);
close(ENS);

$db_handle->disconnect();

print "stop\n";
