#! /usr/bin/perl -w

use strict;
use Bio::SeqIO;
use Data::Dumper;
use Getopt::Long;

my( $grI_tab_file, 
    $grII_tab_file,
    $fasta_file
    );

&GetOptions( "gr1=s"           => \$grI_tab_file,
	     "gr2=s"         => \$grII_tab_file,
	      );

$fasta_file = shift;

open(GRIOUT, '>output/rfam_grI.fasta');
open(GRIOUTGTF, '>output/rfam_grI.gtf');
open(GRIIOUT, '>output/rfam_grII.fasta');
open(GRIIOUTGTF, '>output/rfam_grII.gtf');

my $rfam_grI_command = '../programs/rfam/infernal-1.0.2/src/cmsearch --tabfile output/rfam_grI.tab ../programs/rfam/db_inf_latest/RF00028.cm '.$fasta_file;
my $rfam_grII_command = '../programs/rfam/infernal-1.0.2/src/cmsearch --tabfile output/rfam_grII.tab ../programs/rfam/db_inf_latest/RF00029.cm '.$fasta_file;

my $stream = Bio::SeqIO->new(-file => $fasta_file,
			      -format => 'Fasta');

my @names;
my %rfam_gr1_hits;
my %rfam_gr2_hits;

while ( my $seq = $stream->next_seq() ) {
    push(@names, $seq->id());
    $rfam_gr1_hits{$seq->id()} = ('0'x$seq->length());
    $rfam_gr2_hits{$seq->id()} = ('0'x$seq->length());
}

if(!$grII_tab_file){
    system $rfam_grII_command;
    $grII_tab_file = 'output/rfam_grII.tab';
}
open(GRII,$grII_tab_file) or die "can't find the /tmp/rfam_grII_tab.tmp file";

while(<GRII>) {
    if(/^[^#]\W*\w+\W+(\w+)+\W+(\w+)+\W+(\w+)/ ){
	if($rfam_gr2_hits{$1}){
	    $rfam_gr2_hits{$1} = map_rfam_hits($rfam_gr2_hits{$1},$2,$3);
	}
	if($2 <= $3){
	    print GRIIOUTGTF "$1\t.\tintron\t$2\t$3\t.\t+\t.\tgene_id \"$1\"; transcript_id \"$1\";\n";
	} else {
	    print GRIIOUTGTF "$1\t.\tintron\t$3\t$2\t.\t+\t.\tgene_id \"$1\"; transcript_id \"$1\";\n";
	}
    }
}

foreach my $name (@names){
    print GRIIOUT ">$name\n".$rfam_gr2_hits{$name}."\n";
}

if(!$grI_tab_file){
    system $rfam_grI_command;
    $grI_tab_file = 'output/rfam_grI.tab';
}
open(GRI,$grI_tab_file) or die "can't find the /tmp/rfam_grI_tab.tmp file";

while(<GRI>) {
    if(/^[^#]\W*\w+\W+(\w+)+\W+(\w+)+\W+(\w+)/ ){
	if($rfam_gr1_hits{$1}){
	    $rfam_gr1_hits{$1} = map_rfam_hits($rfam_gr1_hits{$1},$2,$3);
	    if($2 <= $3){
	      print GRIOUTGTF "$1\t.\tintron\t$2\t$3\t.\t+\t.\tgene_id \"$1\"; transcript_id \"$1\";\n";
	    } else {
	      print GRIOUTGTF "$1\t.\tintron\t$3\t$2\t.\t+\t.\tgene_id \"$1\"; transcript_id \"$1\";\n";
	    }
	}
    }
}

foreach my $name (@names){
    print GRIOUT ">$name\n".$rfam_gr1_hits{$name}."\n";
}


sub map_rfam_hits {
    my ($hit_seq,$start ,$end)  = @_;
    if($start < $end){
	my $length = $end - $start + 1;
	if( substr($hit_seq,$start-1,$length) eq ('0'x$length) ){
	  substr($hit_seq,$start-1,$length, '1'x$length);
	}
    } else {
	my $length = $start - $end + 1;
	if( substr($hit_seq,$end-1,$length) eq ('0'x$length) ){
	  substr($hit_seq,$end-1,$length, '2'x$length);
	}
    }
    return $hit_seq;
}





  
  




 
 
