#! /usr/bin/perl -w
use strict;

my $exonerate_fasta_file = $ARGV[0];
my $exonerate_gtf_file = $ARGV[1];
my $query_file = $ARGV[2];
my $target_file = $ARGV[3];

open(INFO, ">>$exonerate_fasta_file");
open(INFOGTF, ">>$exonerate_gtf_file");
open(DUMMY, ">output/dummy_splice.txt");

print DUMMY "\nsplice\n";
close DUMMY;

my $exonerate_command = "../programs/exonerate-2.2.0/bin/exonerate --model protein2genome --geneticcode 3 --showcigar false --showtargetgff yes --showsugar false --showalignment false --showvulgar false --ryo \"\@\%tl\n\" --splice3 output/dummy_splice.txt --splice5 output/dummy_splice.txt  -q $query_file -t $target_file --exhaustive yes";

my $exonerate_output = `$exonerate_command`;
print $exonerate_output;
my @lines = split("\n", $exonerate_output);
my $currentSeqName = '';
my $currentSequence = '';
my $first_exon_start = -1;
my $frame = 0;

my %processed_sequences;

foreach my $line(@lines){
	chomp($line);
	next unless($line =~ /(.+\t){8}.+/ || $line =~ /^\@/);  
	if(substr($line,0,1) eq '@'){
		next unless($currentSequence);
		$processed_sequences{$currentSeqName} = 1;
		$currentSequence = ('0' x ($first_exon_start-1)).$currentSequence;
		my $missing_length =  substr($line,1) - length($currentSequence);
		$currentSequence .= ('0' x $missing_length);
		print INFO ">$currentSeqName\n";
		print INFO "$currentSequence\n\n";
		$currentSequence = '';
		$currentSeqName = '';
		$first_exon_start = -1;
		$frame = 0;
		next;
	}
  
	my @fields = split("\t", $line);
	next if($processed_sequences{$fields[0]});

	my $element_type = $fields[2];
	my $element_start = $fields[3];
	my $element_end = $fields[4];
	my $element_strand = $fields[6];
	my $element_length = $element_end-$element_start+1;

	if($element_type eq 'exon'){
		$frame = ($element_length - (3-$frame) % 3) % 3 	if($element_strand eq '-');
		
		print INFOGTF "$fields[0]\t.\texon\t$element_start\t$element_end\t.\t$element_strand\t$frame\tgene_id \"$fields[0]\"; transcript_id \"$fields[0]\";\n";
		
		if($element_strand eq '+'){
			$frame = (3 - ($element_length - $frame) % 3) % 3;
			$first_exon_start = $element_start if($first_exon_start == -1);
			$currentSequence .= ('1' x $element_length);
		} else {
			$first_exon_start = $element_start;
			$currentSequence = ('2' x $element_length) . $currentSequence;
		}
		
	}  
	if($element_type eq 'intron'){
		print INFOGTF "$fields[0]\t.\tintron\t$element_start\t$element_end\t.\t$element_strand\t.\tgene_id \"$fields[0]\"; transcript_id \"$fields[0]\";\n";
		if($element_strand eq '+'){
			$currentSequence .= ('3' x $element_length);
		} else {
			$currentSequence = ('4' x $element_length) . $currentSequence;
		}
	}

	$currentSeqName =  $fields[0];
}

system "rm output/dummy_splice.txt";

close INFO;
close INFOGTF;
  
  




 
