/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train.validation.distance;

import edu.msu.cme.rdp.alignment.AlignmentMode;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAligner;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAlignment;
import edu.msu.cme.rdp.alignment.pairwise.ScoringMatrix;
import edu.msu.cme.rdp.alignment.pairwise.rna.DistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.IdentityDistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.OverlapCheckFailedException;
import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.validation.HierarchyTree;
import edu.msu.cme.rdp.classifier.train.validation.TreeFactory;
import edu.msu.cme.rdp.classifier.train.validation.distance.BoxPlotUtils;
import edu.msu.cme.rdp.readseq.readers.Sequence;
import edu.msu.cme.rdp.readseq.utils.kmermatch.KmerMatchCore;
import edu.msu.cme.rdp.readseq.utils.kmermatch.NuclSeqMatch;
import edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator;
import java.awt.BasicStroke;
import java.awt.Font;
import java.awt.Stroke;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.TreeSet;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtilities;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.NumberAxis;
import org.jfree.chart.axis.NumberTickUnit;
import org.jfree.chart.axis.ValueAxis;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.plot.XYPlot;
import org.jfree.data.statistics.BoxAndWhiskerItem;
import org.jfree.data.statistics.DefaultBoxAndWhiskerCategoryDataset;
import org.jfree.data.xy.XYDataset;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;

public class TaxaSimilarityMain {
    public static String[] RANKS = new String[]{"norank", "domain", "phylum", "class", "order", "family", "genus"};
    private ArrayList<Short> withinLowestRankSabSet = new ArrayList();
    private ArrayList<Short> diffLowestRankSabSet = new ArrayList();
    private List<String> ranks = new ArrayList<String>();
    private DecimalFormat format = new DecimalFormat("#.###");
    private HashMap<String, long[]> sabCoutMap = new HashMap();
    private final int BINSIZE = 101;
    private ScoringMatrix scoringMatrix = ScoringMatrix.getDefaultNuclMatrix();
    private AlignmentMode mode = AlignmentMode.overlap_trim;
    private static DistanceModel dist = new IdentityDistanceModel(true);

    public TaxaSimilarityMain(List<String> selectedRanks) {
        for (String r : selectedRanks) {
            this.ranks.add(r.toLowerCase());
        }
        for (String rank : this.ranks) {
            this.sabCoutMap.put(rank.toLowerCase(), new long[101]);
        }
    }

    public static List<String> readRanks(String rankFile) throws IOException {
        ArrayList<String> ranks = new ArrayList<String>();
        BufferedReader reader = new BufferedReader(new FileReader(new File(rankFile)));
        String line = null;
        while ((line = reader.readLine()) != null) {
            ranks.add(line.trim());
        }
        return ranks;
    }

    public HashMap<String, HierarchyTree> getAncestorNodes(HierarchyTree root, String seqName, List<String> ancestors) {
        HashMap<String, HierarchyTree> ancestorNodes = new HashMap<String, HierarchyTree>();
        if (!ancestors.get(0).equals(root.getName())) {
            throw new IllegalArgumentException("Sequence " + seqName + " does not have the same root taxon" + root.getName());
        }
        ancestorNodes.put(root.getTaxonomy().getHierLevel(), root);
        HierarchyTree curParent = root;
        for (int i = 1; i < ancestors.size(); ++i) {
            HierarchyTree node = curParent.getSubclassbyName(ancestors.get(i));
            if (node == null) {
                throw new IllegalArgumentException("Sequence " + seqName + " cannot find ancestor node: " + ancestors.get(i));
            }
            ancestorNodes.put(node.getTaxonomy().getHierLevel().toLowerCase(), node);
            curParent = node;
        }
        return ancestorNodes;
    }

    public void calSabSimilarity(String taxonFile, String trainSeqFile, String testSeqFile) throws IOException {
        TreeFactory factory = new TreeFactory(new FileReader(taxonFile));
        factory.buildTree();
        LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
        HashMap<String, List<String>> lineageMap = new HashMap<String, List<String>>();
        while (trainParser.hasNext()) {
            LineageSequence seq = trainParser.next();
            lineageMap.put(seq.getSeqName(), seq.getAncestors());
        }
        trainParser.close();
        NuclSeqMatch sabCal = new NuclSeqMatch(trainSeqFile);
        LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));
        int count = 0;
        while (parser.hasNext()) {
            LineageSequence seq = parser.next();
            HashMap<String, HierarchyTree> queryAncestorNodes = this.getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
            TreeSet matchResults = sabCal.findAllMatches((Sequence)seq);
            short withinLowestRankSab = -1;
            short diffLowestRankSab = -1;
            String bestDiffLowestRankMatch = null;
            for (KmerMatchCore.BestMatch match : matchResults) {
                if (match.getBestMatch().getSeqName().equals(seq.getSeqName())) continue;
                short sab = (short)Math.round(100.0f * match.getSab());
                HashMap<String, HierarchyTree> matchAncestorNodes = this.getAncestorNodes(factory.getRoot(), match.getBestMatch().getSeqName(), (List)lineageMap.get(match.getBestMatch().getSeqName()));
                boolean withinTaxon = false;
                for (int i = this.ranks.size() - 1; i >= 0; --i) {
                    HierarchyTree queryTaxon = queryAncestorNodes.get(this.ranks.get(i));
                    HierarchyTree matchTaxon = matchAncestorNodes.get(this.ranks.get(i));
                    if (queryTaxon == null || matchTaxon == null) continue;
                    if (queryTaxon.getName().equals(matchTaxon.getName())) {
                        if (!withinTaxon) {
                            long[] lArray = this.sabCoutMap.get(this.ranks.get(i));
                            short s = sab;
                            lArray[s] = lArray[s] + 1L;
                        }
                        withinTaxon = true;
                        continue;
                    }
                    withinTaxon = false;
                }
                HierarchyTree speciesQueryTaxon = queryAncestorNodes.get(this.ranks.get(this.ranks.size() - 1));
                HierarchyTree speciesMatchTaxon = matchAncestorNodes.get(this.ranks.get(this.ranks.size() - 1));
                if (speciesQueryTaxon != null && speciesMatchTaxon != null && speciesQueryTaxon.getName().equals(speciesMatchTaxon.getName())) {
                    withinLowestRankSab = sab >= withinLowestRankSab ? sab : withinLowestRankSab;
                    continue;
                }
                if (sab < diffLowestRankSab) continue;
                bestDiffLowestRankMatch = match.getBestMatch().getSeqName();
                diffLowestRankSab = sab;
            }
            if (withinLowestRankSab > 0) {
                this.withinLowestRankSabSet.add(withinLowestRankSab);
            }
            if (diffLowestRankSab > 0) {
                this.diffLowestRankSabSet.add(diffLowestRankSab);
            }
            if (++count % 100 != 0) continue;
            System.out.println(count);
        }
        parser.close();
    }

    public void calPairwiseSimilaritye(String taxonFile, String trainSeqFile, String testSeqFile) throws IOException, OverlapCheckFailedException {
        TreeFactory factory = new TreeFactory(new FileReader(taxonFile));
        factory.buildTree();
        LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
        ArrayList<LineageSequence> trainSeqList = new ArrayList<LineageSequence>();
        while (trainParser.hasNext()) {
            LineageSequence seq = trainParser.next();
            trainSeqList.add(seq);
        }
        trainParser.close();
        LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));
        while (parser.hasNext()) {
            LineageSequence seq = parser.next();
            HashMap<String, HierarchyTree> queryAncestorNodes = this.getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
            for (LineageSequence trainSeq : trainSeqList) {
                if (trainSeq.getSeqName().equals(seq.getSeqName())) continue;
                HashMap<String, HierarchyTree> matchAncestorNodes = this.getAncestorNodes(factory.getRoot(), trainSeq.getSeqName(), trainSeq.getAncestors());
                boolean withinTaxon = false;
                String lowestCommonRank = null;
                for (int i = this.ranks.size() - 1; i >= 0; --i) {
                    HierarchyTree queryTaxon = queryAncestorNodes.get(this.ranks.get(i));
                    HierarchyTree matchTaxon = matchAncestorNodes.get(this.ranks.get(i));
                    if (queryTaxon == null || matchTaxon == null) continue;
                    if (queryTaxon.getName().equals(matchTaxon.getName())) {
                        if (!withinTaxon) {
                            lowestCommonRank = this.ranks.get(i);
                        }
                        withinTaxon = true;
                        continue;
                    }
                    withinTaxon = false;
                }
                if (lowestCommonRank == null) continue;
                PairwiseAlignment result = PairwiseAligner.align((String)seq.getSeqString().replaceAll("U", "T"), (String)trainSeq.getSeqString().replaceAll("U", "T"), (ScoringMatrix)this.scoringMatrix, (AlignmentMode)this.mode);
                short sab = (short)(100.0 - 100.0 * dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0));
                long[] lArray = this.sabCoutMap.get(lowestCommonRank);
                short s = sab;
                lArray[s] = lArray[s] + 1L;
            }
        }
        parser.close();
    }

    public void createPlot(String plotTitle, File outdir) throws IOException {
        XYSeriesCollection dataset = new XYSeriesCollection();
        DefaultBoxAndWhiskerCategoryDataset scatterDataset = new DefaultBoxAndWhiskerCategoryDataset();
        PrintStream boxchart_dataStream = new PrintStream(new File(outdir, plotTitle + ".boxchart.txt"));
        boxchart_dataStream.println("#\tkmer\trank\tmax\tavg\tmin\tQ1\tmedian\tQ3\t98Pct\t2Pct\tcomparisons\tsum");
        for (int i = 0; i < this.ranks.size(); ++i) {
            long[] countArray = this.sabCoutMap.get(this.ranks.get(i));
            if (countArray == null) continue;
            double sum = 0.0;
            int max = 0;
            int min = 100;
            double mean = 0.0;
            int Q1 = -1;
            int median = -1;
            int Q3 = -1;
            int pct_98 = -1;
            int pct_2 = -1;
            long comparisons = 0L;
            boolean minOutlier = false;
            boolean maxOutlier = false;
            XYSeries series = new XYSeries((Comparable)((Object)this.ranks.get(i)));
            for (int c = 0; c < countArray.length; ++c) {
                if (countArray[c] == 0L) continue;
                comparisons += countArray[c];
                sum += (double)(countArray[c] * (long)c);
                if (c < min) {
                    min = c;
                }
                if (c <= max) continue;
                max = c;
            }
            double cum = 0.0;
            for (int c = 0; c < countArray.length; ++c) {
                if (countArray[c] == 0L) continue;
                int pct = (int)Math.floor(100.0 * (cum += (double)countArray[c]) / (double)comparisons);
                series.add((double)c, (double)pct);
                if (pct_2 == -1 && pct >= 5) {
                    pct_2 = c;
                }
                if (Q3 == -1 && pct >= 25) {
                    Q3 = c;
                }
                if (median == -1 && pct >= 50) {
                    median = c;
                }
                if (Q1 == -1 && pct >= 75) {
                    Q1 = c;
                }
                if (pct_98 != -1 || pct < 98) continue;
                pct_98 = c;
            }
            if (series.isEmpty()) continue;
            dataset.addSeries(series);
            BoxAndWhiskerItem item = new BoxAndWhiskerItem(sum / (double)comparisons, (double)median, (double)Q1, (double)Q3, (double)pct_2, (double)pct_98, (double)minOutlier, (double)maxOutlier, new ArrayList());
            scatterDataset.add(item, (Comparable)((Object)this.ranks.get(i)), (Comparable)((Object)""));
            boxchart_dataStream.println("#\t" + GoodWordIterator.getWordsize() + "\t" + this.ranks.get(i) + "\t" + max + "\t" + this.format.format(sum / (double)comparisons) + "\t" + min + "\t" + Q1 + "\t" + median + "\t" + Q3 + "\t" + pct_98 + "\t" + pct_2 + "\t" + comparisons + "\t" + sum);
        }
        boxchart_dataStream.close();
        Font lableFont = new Font("Helvetica", 1, 28);
        JFreeChart chart = ChartFactory.createXYLineChart((String)plotTitle, (String)"Similarity%", (String)"Percent Comparisions", (XYDataset)dataset, (PlotOrientation)PlotOrientation.VERTICAL, (boolean)true, (boolean)true, (boolean)false);
        ((XYPlot)chart.getPlot()).getRenderer().setStroke((Stroke)new BasicStroke(2.0f));
        chart.getLegend().setItemFont(new Font("Helvetica", 1, 24));
        chart.getTitle().setFont(lableFont);
        ((XYPlot)chart.getPlot()).getDomainAxis().setLabelFont(lableFont);
        ((XYPlot)chart.getPlot()).getDomainAxis().setTickLabelFont(lableFont);
        ValueAxis rangeAxis = ((XYPlot)chart.getPlot()).getRangeAxis();
        rangeAxis.setRange(0.0, 100.0);
        rangeAxis.setTickLabelFont(lableFont);
        rangeAxis.setLabelFont(lableFont);
        ((NumberAxis)rangeAxis).setTickUnit(new NumberTickUnit(5.0));
        ChartUtilities.writeScaledChartAsPNG((OutputStream)new PrintStream(new File(outdir, plotTitle + ".linechart.png")), (JFreeChart)chart, (int)800, (int)1000, (int)3, (int)3);
        BoxPlotUtils.createBoxplot(scatterDataset, new PrintStream(new File(outdir, plotTitle + ".boxchart.png")), plotTitle, "Rank", "Similarity%", lableFont);
    }

    public static void main(String[] args) throws IOException, OverlapCheckFailedException {
        String usage = "Usage: taxonfile trainset.fasta query.fasta outdir kmersize rankFile sab|pw \n  This program calculates the average similarity (Sab score, or pairwise alignment) within taxa\n  and plot the box and whisker plot and accumulation curve plot. \n  rankFile: a file contains a list of ranks to be calculated and plotted. One rank per line, no particular order required. \n  Note pw is extremely slower, recommended only for lower ranks such as species, genus and family. ";
        if (args.length != 7) {
            System.err.println(usage);
            System.exit(1);
        }
        List<String> ranks = TaxaSimilarityMain.readRanks(args[5]);
        File outdir = new File(args[3]);
        if (!outdir.isDirectory()) {
            System.err.println("outdir must be a directory");
            System.exit(1);
        }
        int kmer = Integer.parseInt(args[4]);
        GoodWordIterator.setWordSize((int)kmer);
        TaxaSimilarityMain theObj = new TaxaSimilarityMain(ranks);
        String plotTitle = new File(args[2]).getName();
        int index = plotTitle.indexOf(".");
        if (index != -1) {
            plotTitle = plotTitle.substring(0, index);
        }
        if (args[6].equalsIgnoreCase("sab")) {
            theObj.calSabSimilarity(args[0], args[1], args[2]);
        } else {
            theObj.calPairwiseSimilaritye(args[0], args[1], args[2]);
        }
        theObj.createPlot(plotTitle, outdir);
    }
}

