#! /usr/bin/perl -w
#for each file $file given in arguments extract gene to $file.'.fasta' file and to global file glob.fasta and extract CDS of this gene inside new dasta file to $file.'.gtf' and to global file glob.gtf 
#arguments: genbank files to be extracted
#end boundaries are not compatible with conrad

use strict;
use Bio::SeqIO;
use Data::Dumper;

my @strand = ('-','.','+');

my $gene_id = 0;
my $start_gene = 0;
my $end_gene = 0;
my $start_cds = 0;
my $end_cds = 0;

my $glob_fasta = 'glob.fasta';
my $glob_gtf = 'glob.gtf';

open(INFO4, ">$glob_fasta");
open(INFO5, ">$glob_gtf");

#arguments - file names
foreach my $file (@ARGV) {
    my $fasta_file = $file.'.fasta';
    my $gtf_file = $file.'.gtf';

    open(INFO2, ">$fasta_file");
    open(INFO3, ">$gtf_file");

    my $stream = Bio::SeqIO->new(-file => $file,
			      -format => 'GenBank');
    while ( my $seq = $stream->next_seq() ) {
	
	my @ann = $seq->get_SeqFeatures();
	#print Dumper(\@ann);
	foreach my $feature (@ann) {
	    if($feature->primary_tag() eq 'gene') {
	      $gene_id++;
	      $start_gene = $feature->location->start - 200; 
	      if($start_gene<1){ $start_gene = 1;}
	      $end_gene = $feature->location->end + 200;
	      if($end_gene>$seq->length()){$end_gene = $seq->length();}

	      my $s = $seq->subseq($start_gene,$end_gene);

	      print INFO2 ">refgene_$gene_id\n$s\n";
	      print INFO4 ">refgene_$gene_id\n$s\n";
		#print "refgene_$gene_id\t.\tgene\t$start_gene\t$end_gene\t.\t+\t0\tgene_id \"$gene_id\"; transcript_id \"$gene_id\";\n"
	    }

	    if($feature->primary_tag() eq 'CDS') {
		if ( $feature->location->isa('Bio::Location::SplitLocationI'))  {
		  for my $location ( $feature->location->sub_Location ) {
		    $start_cds = $location->start - $start_gene +1;
		    $end_cds = $location->end - $start_gene +1;
		    my $str = $feature->location->strand +1;
		    print INFO3 "refgene_$gene_id\t.\tCDS\t$start_cds\t$end_cds\t.\t$strand[$str]\t0\tgene_id \"$gene_id\"; transcript_id \"$gene_id\";\n";
		    print INFO5 "refgene_$gene_id\t.\tCDS\t$start_cds\t$end_cds\t.\t$strand[$str]\t0\tgene_id \"$gene_id\"; transcript_id \"$gene_id\";\n";
		  }
		} else {
		  $start_cds = $feature->location->start - $start_gene +1;
		  $end_cds = $feature->location->end - $start_gene +1;
		  my $str = $feature->location->strand +1;
		  print INFO3 "refgene_$gene_id\t.\tCDS\t$start_cds\t$end_cds\t.\t$strand[$str]\t0\tgene_id \"$gene_id\"; transcript_id \"$gene_id\";\n";
		  print INFO5 "refgene_$gene_id\t.\tCDS\t$start_cds\t$end_cds\t.\t$strand[$str]\t0\tgene_id \"$gene_id\"; transcript_id \"$gene_id\";\n";
		}
	    }
	}
    }
}



sub get_value {
    my ($collection, $key) = @_;

    my @values = $collection->get_Annotations($key);
    return unless @values==1;
    return $values[0]->value();
}

 
