package mlproject.stats;

import java.text.DecimalFormat;

import mlproject.hmm.StateModel;

import mlproject.io.TrainingSequence;

import mlproject.util.Util;

public class Statistics {
    private StateModel model;
    
    private int totalLength = 0;
    private int totalPlusExonLength = 0;
    private int totalMinusExonLength = 0;
    private int totalStateMatches = 0;
    private int totalSequences = 0;
    private int perfectlyPredictedSequences = 0;
    private int totalPlusExonStateMatches = 0;
    private int totalMinusExonStateMatches = 0;
    
    private int totalReversePlusExonLength = 0;
    private int totalReverseMinusExonLength = 0;;
    private int totalReversePlusExonStateMatches = 0;
    private int totalReverseMinusExonStateMatches = 0;
    
    int[] intergenicStateStats = new int[4];
    int[] plusExonStateStats = new int[4];
    int[] minusExonStateStats = new int[4];
    int[] plusIntronStateStats = new int[4];
    int[] minusIntronStateStats = new int[4];    
    
    int[] plusExonStats = new int[4];
    int[] minusExonStats = new int[4];
    
    int[] plusIntronStats = new int[4];
    int[] minusIntronStats = new int[4];
    
    int TP = 0;
    int FP = 1;
    int TN = 2;
    int FN = 3;
    
    
    public Statistics(StateModel model) {
        this.model = model;
    }
    
    private void incrementStats(int[] statsArray, boolean predicted, boolean actual){
        if(predicted && actual) statsArray[TP]++;
        if(predicted && !actual) statsArray[FP]++;
        if(!predicted && !actual) statsArray[TN]++;
        if(!predicted && actual) statsArray[FN]++;
    }
    
    
    
    
    
    public void addTrainingSequence(TrainingSequence seq, int[] states){
        totalLength += states.length;
        totalSequences++;
        boolean foundMismatch = false;
        boolean readingPredictedExon = false;
        boolean readingActualExon = false;
        int predictedStrand = 0;
        int actualStrand = 0;
        int predictedExonStart = 0;
        int actualExonStart = 0;
        
        boolean readingPredictedIntron = false;
        boolean readingActualIntron = false;
        int predictedIntronStart = 0;
        int actualIntronStart = 0;
        
        for(int i=0;i<seq.length();i++){
            
            int stateY = seq.getY(i);
            int state = states[i];
            if(state == stateY) totalStateMatches++;
            else foundMismatch = true;
            
            incrementStats(intergenicStateStats,Util.containsElement(model.getIntergenicStates(),state),Util.containsElement(model.getIntergenicStates(),stateY));
            incrementStats(plusExonStateStats,Util.containsElement(model.getPlusExonicStates(),state),Util.containsElement(model.getPlusExonicStates(),stateY));
            incrementStats(minusExonStateStats,Util.containsElement(model.getMinusExonicStates(),state),Util.containsElement(model.getMinusExonicStates(),stateY));
            incrementStats(plusIntronStateStats,Util.containsElement(model.getPlusIntronicStates(),state),Util.containsElement(model.getPlusIntronicStates(),stateY));
            incrementStats(minusIntronStateStats,Util.containsElement(model.getMinusIntronicStates(),state),Util.containsElement(model.getMinusIntronicStates(),stateY));
            
            
            if(Util.containsElement(model.getPlusExonicStates(),stateY)){
                totalPlusExonLength++;
                if(state == stateY) totalPlusExonStateMatches++;
            }
            
                        
            if(Util.containsElement(model.getMinusExonicStates(),stateY)){
                totalMinusExonLength++;
                if(state == stateY) totalMinusExonStateMatches++;
            }
            
            // reverse
            
            
            if(Util.containsElement(model.getPlusExonicStates(),state)){
                totalReversePlusExonLength++;
                if(state == stateY) totalReversePlusExonStateMatches++;
            }
                        
            if(Util.containsElement(model.getMinusExonicStates(),state)){
                totalReverseMinusExonLength++;
                if(state == stateY) totalReverseMinusExonStateMatches++;
            }
            
            /* exon stats predicted*/
            
            if(Util.containsElement(model.getPlusExonicStates(),state)){
                if(!readingPredictedExon){
                    readingPredictedExon = true;
                    predictedExonStart = i;
                    predictedStrand = 1;
                }
            }
            
            if(Util.containsElement(model.getMinusExonicStates(),state)){
                if(!readingPredictedExon){
                    readingPredictedExon = true;
                    predictedExonStart = i;
                    predictedStrand = -1;
                }
            }
            
            
            if(!Util.containsElement(model.getPlusExonicStates(),state) && !Util.containsElement(model.getMinusExonicStates(),state)) {
                if(readingPredictedExon){
                    readingPredictedExon = false;
                    boolean foundConflict = false;
                    if(predictedStrand == 1){
                        /* check if actual states match */
                        for(int j=predictedExonStart;j<i;j++){
                            if(!Util.containsElement(model.getPlusExonicStates(),seq.getY(j))){
                                foundConflict = true;
                                break;
                            }
                        }
                        /* check if boundaries match*/
                        if(predictedExonStart==0?true:!Util.containsElement(model.getPlusExonicStates(),seq.getY(predictedExonStart-1)) &&
                            !Util.containsElement(model.getPlusExonicStates(),seq.getY(i)) &&
                            !foundConflict) plusExonStats[TP]++;
                        else
                            plusExonStats[FP]++;
                        
                    } else {
                        /* check if actual states match */
                        for(int j=predictedExonStart;j<i;j++){
                            if(!Util.containsElement(model.getMinusExonicStates(),seq.getY(j))){
                                foundConflict = true;
                                break;
                            }
                        }
                        /* check if boundaries match*/
                        if(predictedExonStart==0?true:!Util.containsElement(model.getMinusExonicStates(),seq.getY(predictedExonStart-1)) &&
                            !Util.containsElement(model.getMinusExonicStates(),seq.getY(i)) &&
                            !foundConflict) minusExonStats[TP]++;
                        else
                            minusExonStats[FP]++;
                    }
                }
            } else if(i == seq.length()){
                boolean foundConflict = false;
                if(predictedStrand == 1){
                    /* check if actual states match */
                    for(int j=predictedExonStart;j<i+1;j++){
                        if(!Util.containsElement(model.getPlusExonicStates(),seq.getY(j))){
                            foundConflict = true;
                            break;
                        }
                    }
                    /* check if boundaries match*/
                    if(predictedExonStart==0?true:!Util.containsElement(model.getPlusExonicStates(),seq.getY(predictedExonStart-1)) &&
                        !foundConflict) plusExonStats[TP]++;
                    else
                        plusExonStats[FP]++;
                } else {
                    /* check if actual states match */
                    for(int j=predictedExonStart;j<i+1;j++){
                        if(!Util.containsElement(model.getMinusExonicStates(),seq.getY(j))){
                            foundConflict = true;
                            break;
                        }
                    }
                    /* check if boundaries match*/
                    if(predictedExonStart==0?true:!Util.containsElement(model.getMinusExonicStates(),seq.getY(predictedExonStart-1)) &&
                        !foundConflict) minusExonStats[TP]++;
                    else
                        minusExonStats[FP]++;
                }
            }
            
            /* Exon stats actual*/
            
            if(Util.containsElement(model.getPlusExonicStates(),stateY)){
                if(!readingActualExon){
                    readingActualExon = true;
                    actualExonStart = i;
                    actualStrand = 1;
                }
            }
            
            if(Util.containsElement(model.getMinusExonicStates(),stateY)){
                if(!readingActualExon){
                    readingActualExon = true;
                    actualExonStart = i;
                    actualStrand = -1;
                }
            }
            
            
            if(!Util.containsElement(model.getPlusExonicStates(),stateY) && !Util.containsElement(model.getMinusExonicStates(),stateY)){
                if(readingActualExon){
                    readingActualExon = false;
                    boolean foundConflict = false;
                    if(actualStrand == 1){
                        /* check if predicted states match */
                        for(int j=actualExonStart;j<i;j++){
                            if(!Util.containsElement(model.getPlusExonicStates(),states[j])){
                                foundConflict = true;
                                break;
                            }
                        }
                        /* check if boundaries match*/
                        if(actualExonStart==0?false:Util.containsElement(model.getPlusExonicStates(),states[actualExonStart-1]) ||
                            Util.containsElement(model.getPlusExonicStates(),states[i]) ||foundConflict) 
                            plusExonStats[FN]++;
                    } else {
                        for(int j=actualExonStart;j<i;j++){
                            if(!Util.containsElement(model.getMinusExonicStates(),states[j])){
                                foundConflict = true;
                                break;
                            }
                        }
                        /* check if boundaries match*/
                        if(actualExonStart==0?false:Util.containsElement(model.getMinusExonicStates(),states[actualExonStart-1]) ||
                            Util.containsElement(model.getMinusExonicStates(),states[i]) ||foundConflict) 
                            minusExonStats[FN]++;
                    }
                }
            } else if(i == seq.length()){
                boolean foundConflict = false;
                if(actualStrand == 1){
                    /* check if predicted states match */
                    for(int j=actualExonStart;j<i+1;j++){
                        if(!Util.containsElement(model.getPlusExonicStates(),states[j])){
                            foundConflict = true;
                            break;
                        }
                    }
                    if(actualExonStart==0?false:Util.containsElement(model.getPlusExonicStates(),states[actualExonStart-1])||foundConflict) 
                        plusExonStats[FN]++;
                } else {
                    /* check if boundaries match*/
                    for(int j=actualExonStart;j<i+1;j++){
                        if(!Util.containsElement(model.getMinusExonicStates(),states[j])){
                            foundConflict = true;
                            break;
                        }
                    }
                    if(actualExonStart==0?false:Util.containsElement(model.getMinusExonicStates(),states[actualExonStart-1])||foundConflict) 
                        minusExonStats[FN]++;
                }
            }
            
            /* Intron stats predicted*/
            
            /* Introns starts */
            if(Util.containsElement(model.getPlusIntronicStates(),state)){
                if(!readingPredictedIntron){
                    readingPredictedIntron = true;
                    predictedIntronStart = i;
                    predictedStrand = 1;
                }
            }
            
            if(Util.containsElement(model.getMinusIntronicStates(),state)){
                if(!readingPredictedIntron){
                    readingPredictedIntron = true;
                    predictedIntronStart = i;
                    predictedStrand = -1;
                }
            }
            
            /* non intron states */
            if(!Util.containsElement(model.getPlusIntronicStates(),state) && !Util.containsElement(model.getMinusIntronicStates(),state)) {
                if(readingPredictedIntron){
                    readingPredictedIntron = false;
                    boolean foundConflict = false;
                    if(predictedStrand == 1){
                        /* check if actual states match */
                        for(int j=predictedIntronStart;j<i;j++){
                            if(!Util.containsElement(model.getPlusIntronicStates(),seq.getY(j))){
                                foundConflict = true;
                                break;
                            }
                        }
                         /* check if boundaries match*/
                        if(!Util.containsElement(model.getPlusIntronicStates(),seq.getY(predictedIntronStart-1)) &&
                            !Util.containsElement(model.getPlusIntronicStates(),seq.getY(i)) &&
                            !foundConflict) plusIntronStats[TP]++;
                        else
                            plusIntronStats[FP]++;
                        
                    } else {
                        for(int j=predictedIntronStart;j<i;j++){
                            if(!Util.containsElement(model.getMinusIntronicStates(),seq.getY(j))){
                                foundConflict = true;
                                break;
                            }
                        }
                        /* check if boundaries match*/
                        if(!Util.containsElement(model.getMinusIntronicStates(),seq.getY(predictedIntronStart-1)) &&
                            !Util.containsElement(model.getMinusIntronicStates(),seq.getY(i)) &&
                            !foundConflict) minusIntronStats[TP]++;
                        else
                            minusIntronStats[FP]++;
                    }
                }
            } else if(i == seq.length()){
                boolean foundConflict = false;
                if(predictedStrand == 1){
                    /* check if actual states match */
                    for(int j=predictedIntronStart;j<i+1;j++){
                        if(!Util.containsElement(model.getPlusIntronicStates(),seq.getY(j))){
                            foundConflict = true;
                            break;
                        }
                    }
                    /* check if boundaries match*/
                    if(!Util.containsElement(model.getPlusIntronicStates(),seq.getY(predictedIntronStart-1)) &&
                        !foundConflict) plusIntronStats[TP]++;
                    else
                        plusIntronStats[FP]++;
                } else {
                    for(int j=predictedIntronStart;j<i+1;j++){
                        if(!Util.containsElement(model.getMinusIntronicStates(),seq.getY(j))){
                            foundConflict = true;
                            break;
                        }
                    }
                    /* check if boundaries match*/
                    if(!Util.containsElement(model.getMinusIntronicStates(),seq.getY(predictedIntronStart-1)) &&
                        !foundConflict) minusIntronStats[TP]++;
                    else
                        minusIntronStats[FP]++;
                }
            }
            
            /* Intron stats actual*/
            
            if(Util.containsElement(model.getPlusIntronicStates(),stateY)){
                if(!readingActualIntron){
                    readingActualIntron = true;
                    actualIntronStart = i;
                    actualStrand = 1;
                }
            }
            
            if(Util.containsElement(model.getMinusIntronicStates(),stateY)){
                if(!readingActualIntron){
                    readingActualIntron = true;
                    actualIntronStart = i;
                    actualStrand = -1;
                }
            }
            
            
            if(!Util.containsElement(model.getPlusIntronicStates(),stateY) && !Util.containsElement(model.getMinusIntronicStates(),stateY)){
                if(readingActualIntron){
                    readingActualIntron = false;
                    boolean foundConflict = false;
                    if(actualStrand == 1){
                        /* check if predicted states match */
                        for(int j=actualIntronStart;j<i;j++){
                            if(!Util.containsElement(model.getPlusIntronicStates(),states[j])){
                                foundConflict = true;
                                break;
                            }
                        }
                        if(Util.containsElement(model.getPlusIntronicStates(),states[actualIntronStart-1]) ||
                            Util.containsElement(model.getPlusIntronicStates(),states[i]) ||foundConflict) 
                            plusIntronStats[FN]++;
                    } else {
                        for(int j=actualIntronStart;j<i;j++){
                            if(!Util.containsElement(model.getMinusIntronicStates(),states[j])){
                                foundConflict = true;
                                break;
                            }
                        }
                        if(Util.containsElement(model.getMinusIntronicStates(),states[actualIntronStart-1]) ||
                            Util.containsElement(model.getMinusIntronicStates(),states[i]) ||foundConflict) 
                            minusIntronStats[FN]++;
                    }
                }
            } else if(i == seq.length()){
                boolean foundConflict = false;
                if(actualStrand == 1){
                    /* check if predicted states match */
                    for(int j=actualIntronStart;j<i+1;j++){
                        if(!Util.containsElement(model.getPlusIntronicStates(),states[j])){
                            foundConflict = true;
                            break;
                        }
                    }
                    if(Util.containsElement(model.getPlusIntronicStates(),states[actualIntronStart-1])||foundConflict) 
                        plusIntronStats[FN]++;
                } else {
                    for(int j=actualIntronStart;j<i+1;j++){
                        if(!Util.containsElement(model.getMinusIntronicStates(),states[j])){
                            foundConflict = true;
                            break;
                        }
                    }
                    if(Util.containsElement(model.getMinusIntronicStates(),states[actualIntronStart-1])||foundConflict) 
                        minusIntronStats[FN]++;
                }
            }
            
            
        }
        if(!foundMismatch)
            perfectlyPredictedSequences++;
    }
    
    public String toString(){
        int[]  codingNucleotidsStats = new int[4];
        codingNucleotidsStats[0] = plusExonStateStats[0] + minusExonStateStats[0];
        codingNucleotidsStats[1] = plusExonStateStats[1] + minusExonStateStats[1];
        codingNucleotidsStats[2] = plusExonStateStats[2] + minusExonStateStats[2];
        codingNucleotidsStats[3] = plusExonStateStats[3] + minusExonStateStats[3];
        
        int[]  exonStats = new int[4];
        exonStats[0] = plusExonStats[0] + minusExonStats[0];
        exonStats[1] = plusExonStats[1] + minusExonStats[1];
        exonStats[2] = plusExonStats[2] + minusExonStats[2];
        exonStats[3] = plusExonStats[3] + minusExonStats[3];
        
        int[]  intronStats = new int[4];
        intronStats[0] = plusIntronStats[0] + minusIntronStats[0];
        intronStats[1] = plusIntronStats[1] + minusIntronStats[1];
        intronStats[2] = plusIntronStats[2] + minusIntronStats[2];
        intronStats[3] = plusIntronStats[3] + minusIntronStats[3];
        
        int[]  intronStateStats = new int[4];
        intronStateStats[0] = plusIntronStateStats[0] + minusIntronStateStats[0];
        intronStateStats[1] = plusIntronStateStats[1] + minusIntronStateStats[1];
        intronStateStats[2] = plusIntronStateStats[2] + minusIntronStateStats[2];
        intronStateStats[3] = plusIntronStateStats[3] + minusIntronStateStats[3];
        
        DecimalFormat format = new DecimalFormat("#.###");
        
        return "nucleotide matches:" + format.format(((double)totalStateMatches/totalLength * 100)) + "%\n" +
        "perfectly predicted sequences:" + perfectlyPredictedSequences + "/"+ totalSequences + "\n" +
        "---Exon statistics----\n"+
        "exon :" + Util.printStats2(exonStats) +
        "exon+ :" + Util.printStats2(plusExonStats) +
        "exon- :" + Util.printStats2(minusExonStats) +       
        "---Intron statistics----\n"+
        "intron :" + Util.printStats2(intronStats) +
        "intron+ :" + Util.printStats2(plusIntronStats) +
        "intron- :" + Util.printStats2(minusIntronStats) +       
        "---Matches against real annotation----\n"+
        "exon+ state matches:" + format.format(((double)totalPlusExonStateMatches/totalPlusExonLength * 100)) + "%\n" +
        "exon- state matches:" + format.format(((double)totalMinusExonStateMatches/totalMinusExonLength * 100)) + "%\n" +
        "---Matches against predicted annotation----\n"+
        "exon+ state matches:" + format.format(((double)totalReversePlusExonStateMatches/totalReversePlusExonLength * 100)) + "%\n" +
        "exon- state matches:" + format.format(((double)totalReverseMinusExonStateMatches/totalReverseMinusExonLength * 100)) + "%\n"+
        "---State statistics----\n"+
        "coding nucleotides:" + Util.printStats(codingNucleotidsStats) +  
        "intron stats:" + Util.printStats(intronStateStats) +  
        "intergenic stats:" + Util.printStats(intergenicStateStats) +
        "exon+ stats:" + Util.printStats(plusExonStateStats) +
        "exon- stats:" + Util.printStats(minusExonStateStats) +
        "intron+ stats:" + Util.printStats(plusIntronStateStats) +
        "intron- stats:" + Util.printStats(minusIntronStateStats)
        ;
    }
    
    public String toTabbedInfoString(){
        int[]  codingNucleotidsStats = new int[4];
        codingNucleotidsStats[0] = plusExonStateStats[0] + minusExonStateStats[0];
        codingNucleotidsStats[1] = plusExonStateStats[1] + minusExonStateStats[1];
        codingNucleotidsStats[2] = plusExonStateStats[2] + minusExonStateStats[2];
        codingNucleotidsStats[3] = plusExonStateStats[3] + minusExonStateStats[3];
        
        int[]  exonStats = new int[4];
        exonStats[0] = plusExonStats[0] + minusExonStats[0];
        exonStats[1] = plusExonStats[1] + minusExonStats[1];
        exonStats[2] = plusExonStats[2] + minusExonStats[2];
        exonStats[3] = plusExonStats[3] + minusExonStats[3];
        
        int[]  intronStateStats = new int[4];
        intronStateStats[0] = plusIntronStateStats[0] + minusIntronStateStats[0];
        intronStateStats[1] = plusIntronStateStats[1] + minusIntronStateStats[1];
        intronStateStats[2] = plusIntronStateStats[2] + minusIntronStateStats[2];
        intronStateStats[3] = plusIntronStateStats[3] + minusIntronStateStats[3];
        
        DecimalFormat format = new DecimalFormat("#.###");
        
        return "" + format.format(((double)totalStateMatches/totalLength * 100)) + "%\t" +
        perfectlyPredictedSequences + "/"+ totalSequences + "\t" +
        
        Util.printTabbedStats2(exonStats) + "\t" +
        Util.printTabbedStats3(codingNucleotidsStats) + "\t" +  
        Util.printTabbedStats3(intronStateStats);  
        
        
    }
}
