/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train.validation.crossvalidate;

import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.validation.HierarchyTree;
import edu.msu.cme.rdp.classifier.train.validation.TreeFactory;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

public class RdmSelectTaxon {
    public static Set<String> randomSelectTaxon(File tax_file, File source_file, float fraction, String rank) throws IOException {
        TreeFactory factory = new TreeFactory(new FileReader(tax_file));
        LineageSequenceParser parser = new LineageSequenceParser(source_file);
        HashMap<String, HashSet<String>> genusTrainSeqMap = new HashMap<String, HashSet<String>>();
        while (parser.hasNext()) {
            LineageSequence pSeq = parser.next();
            HierarchyTree genusNode = factory.addSequence(pSeq);
            HashSet<String> genusSeqSet = (HashSet<String>)genusTrainSeqMap.get(genusNode.getName());
            if (genusSeqSet == null) {
                genusSeqSet = new HashSet<String>();
                genusTrainSeqMap.put(genusNode.getName(), genusSeqSet);
            }
            genusSeqSet.add(pSeq.getSeqName());
        }
        parser.close();
        ArrayList nodeList = new ArrayList();
        factory.getRoot().getNodeList(rank, nodeList);
        HashSet selectedNodes = new HashSet();
        int testCount = (int)((float)nodeList.size() * fraction);
        while (selectedNodes.size() < testCount) {
            int rdmIndex = (int)Math.floor(Math.random() * (double)nodeList.size());
            selectedNodes.add(nodeList.get(rdmIndex));
            nodeList.remove(rdmIndex);
        }
        HashSet<String> selectedSeqIDs = new HashSet<String>();
        for (HierarchyTree node : selectedNodes) {
            ArrayList tmp = new ArrayList();
            node.getNodeList("GENUS", tmp);
            for (HierarchyTree genusNode : tmp) {
                selectedSeqIDs.addAll((Collection)genusTrainSeqMap.get(genusNode.getName()));
            }
        }
        return selectedSeqIDs;
    }

    public static void main(String[] args) throws Exception {
        String Usage = "seqfile fraction tax_file rank \n Without tax_file and rank option, sequences will be randomly selected without replacement\n with tax_file and rank option, only taxa at the certain rank level will be randomly selected";
        if (args.length != 4) {
            throw new IllegalArgumentException(Usage);
        }
        File seq_file = new File(args[0]);
        float fraction = Float.parseFloat(args[1]);
        if (fraction <= 0.0f && (double)fraction > 1.0) {
            throw new Exception("number of fraction should be between 0 and 1");
        }
        File tax_file = new File(args[2]);
        RdmSelectTaxon.randomSelectTaxon(tax_file, seq_file, fraction, args[3]);
    }
}

