#! /usr/bin/perl -w
use strict;

my $mfannot_master_file = $ARGV[0];

open(INFO, "$mfannot_master_file");



my $frame = 0;

my $current_organism = "";
my $current_gene = "";
my $current_sequence = "";

my $start_cds = -1;
my $end_cds = -1;

my $read_start_cds = 0;
my $read_end_cds = 0;

my $strand = "";
my $remove_stop_codon = 0;

while(<INFO>){
    my $line = $_;
	if($line =~ /^>(.+)_(.+)/){
		$current_organism = $1;
		$current_gene = lc $2;
		$current_sequence = $current_organism."_".uc($current_gene);
		$read_start_cds = 0;
		$read_end_cds = 0;
		$remove_stop_codon = 0;
		$start_cds = -1;
		$end_cds = -1;
	}

    if($line =~ /G-$current_gene\_\d+ [<=]=[=>].* start/i){
		$read_start_cds = 1;
		$strand = "+" if($end_cds == -1);
	}

	if($line =~ /G-$current_gene\_\d+-E\d+ [<=]=[=>] start/i){
		$read_start_cds = 1;
	}

	if($read_start_cds && $line =~ / +(\d+) +/){
		$start_cds = $1;
		$start_cds -= 1 if($strand eq "-");


		$read_start_cds = 0;
		$remove_stop_codon = 0;
		
		if($strand eq "-"){
			print "$current_sequence\t.\tCDS\t$end_cds\t$start_cds\t.\t-\t0\tgene_id \"$current_sequence\"; transcript_id \"$current_sequence\";\n";

			$start_cds = -1;
			$end_cds = -1;
		}
	}

	if($line =~ /G-$current_gene\_\d+ [<=]=[=>] end/i){
		$read_end_cds = 1;
        $strand = "-" if($start_cds == -1);
        $remove_stop_codon = 1;
	}

	if($line =~ /G-$current_gene\_\d+-E\d+ [<=]=[=>] end/i){
		$read_end_cds = 1;
	}

	if($read_end_cds && $line =~ / +(\d+) +/){
		$end_cds = $1;
		$end_cds -= 1 if($strand eq "+");
		$end_cds -= 3 if($strand eq "+" && $remove_stop_codon);
		$end_cds += 3 if($strand eq "-" && $remove_stop_codon);

		$read_end_cds = 0;
		$remove_stop_codon = 0;

		if($strand eq "+"){
			print "$current_sequence\t.\tCDS\t$start_cds\t$end_cds\t.\t+\t0\tgene_id \"$current_sequence\"; transcript_id \"$current_sequence\";\n";

			$start_cds = -1;
			$end_cds = -1;
		}
	}  
}


close INFO;
  
  




 
