/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train.validation.distance;

import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.validation.HierarchyTree;
import edu.msu.cme.rdp.classifier.train.validation.TreeFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

public class CompareTrainingSets {
    private ArrayList<HierarchyTreeExtend> trainsets = new ArrayList();
    private ArrayList<String> ranks = new ArrayList();

    public CompareTrainingSets(String rankFile, String[] files) throws IOException {
        String line;
        BufferedReader reader = new BufferedReader(new FileReader(new File(rankFile)));
        while ((line = reader.readLine()) != null) {
            this.ranks.add(line.trim());
        }
        reader.close();
        for (int i = 0; i < files.length; i += 2) {
            HierarchyTreeExtend trainset = this.parseOneTraining(files[i], files[i + 1], i / 2, "", "");
            this.trainsets.add(trainset);
        }
    }

    private HierarchyTreeExtend parseOneTraining(String taxFile, String seqFile, int trainset_no, String version, String modification) throws IOException {
        File temp = new File(taxFile);
        int index = temp.getName().indexOf(".");
        String trainsetName = temp.getName();
        if (index != -1) {
            trainsetName = trainsetName.substring(0, index);
        }
        FileReader tax = new FileReader(new File(taxFile));
        TreeFactory factory = new TreeFactory(tax);
        LineageSequenceParser parser = new LineageSequenceParser(new File(seqFile));
        HashMap<String, String> seqMap = new HashMap<String, String>();
        while (parser.hasNext()) {
            String[] values;
            LineageSequence seq = parser.next();
            factory.addSequence(seq, false);
            if (seq.getSeqName().contains("|S00")) {
                values = seq.getSeqName().split("\\|");
                seqMap.put(values[0], seq.getDesc());
                continue;
            }
            if (seq.getSeqName().contains("|SH")) {
                values = seq.getSeqName().split("\\|");
                seqMap.put(values[1], seq.getDesc());
                continue;
            }
            seqMap.put(seq.getSeqName(), seq.getDesc());
        }
        parser.close();
        HierarchyTreeExtend retVal = new HierarchyTreeExtend(factory.getRoot(), trainsetName);
        retVal.seqMap = seqMap;
        return retVal;
    }

    public void compare(String summaryOutFile, String detailOutFile) throws IOException {
        HierarchyTree root;
        PrintStream outStream = new PrintStream(summaryOutFile);
        PrintStream detailOutStream = new PrintStream(detailOutFile);
        outStream.println("## data for Taxonomic Composition");
        outStream.print("Rank");
        for (HierarchyTreeExtend factory : this.trainsets) {
            outStream.print("\t" + factory.trainsetName);
        }
        outStream.println();
        for (int i = 0; i < this.ranks.size(); ++i) {
            outStream.print(this.ranks.get(i));
            for (int t = 0; t < this.trainsets.size(); ++t) {
                root = this.trainsets.get(t).getRoot();
                HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
                root.getNodeMap(this.ranks.get(i), nodeMap);
                outStream.print("\t" + nodeMap.size());
            }
            outStream.println();
        }
        outStream.print("All Seqs");
        for (HierarchyTreeExtend factory : this.trainsets) {
            root = factory.getRoot();
            outStream.print("\t" + root.getTotalSeqs());
        }
        outStream.println();
        if (this.trainsets.size() == 2 || this.trainsets.size() == 3) {
            int t;
            outStream.println("\n## data for Venn Diagram");
            for (int i = 0; i < this.ranks.size(); ++i) {
                int t2;
                outStream.println("\n## Rank " + this.ranks.get(i));
                ArrayList<Set<String>> taxaList = new ArrayList<Set<String>>();
                for (int t3 = 0; t3 < this.trainsets.size(); ++t3) {
                    HierarchyTree root2 = this.trainsets.get(t3).getRoot();
                    HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
                    root2.getNodeMap(this.ranks.get(i), nodeMap);
                    taxaList.add(nodeMap.keySet());
                }
                HashSet<String> tempSet = new HashSet<String>();
                tempSet.addAll((Collection)taxaList.get(0));
                for (t2 = 1; t2 < taxaList.size(); ++t2) {
                    tempSet.retainAll((Collection)taxaList.get(t2));
                }
                outStream.println("Shared by all:\t" + tempSet.size());
                for (t2 = 0; t2 < taxaList.size(); ++t2) {
                    tempSet.clear();
                    tempSet.addAll((Collection)taxaList.get(t2));
                    for (int k = 1; k < taxaList.size(); ++k) {
                        tempSet.removeAll((Collection)taxaList.get((k + t2) % taxaList.size()));
                    }
                    outStream.println("Unique to " + this.trainsets.get(t2).getTrainsetName() + ":\t" + tempSet.size());
                    this.print(detailOutStream, this.ranks.get(i) + " unique to " + this.trainsets.get(t2).getTrainsetName(), tempSet);
                }
                for (t2 = 0; t2 < taxaList.size(); ++t2) {
                    tempSet.clear();
                    tempSet.addAll((Collection)taxaList.get(t2));
                    tempSet.retainAll((Collection)taxaList.get((t2 + 1) % taxaList.size()));
                    if (taxaList.size() == 3) {
                        tempSet.removeAll((Collection)taxaList.get((t2 + 2) % taxaList.size()));
                    }
                    outStream.println("Shared only by " + this.trainsets.get(t2).getTrainsetName() + " and " + this.trainsets.get((t2 + 1) % taxaList.size()).getTrainsetName() + ":\t" + tempSet.size());
                }
            }
            outStream.println("\n## Shared Sequences (by seqID)");
            HashSet<String> tempSet = new HashSet<String>();
            tempSet.addAll(this.trainsets.get(0).seqMap.keySet());
            for (t = 1; t < this.trainsets.size(); ++t) {
                tempSet.retainAll(this.trainsets.get(t).seqMap.keySet());
            }
            outStream.println("Shared seqs by all:\t" + tempSet.size());
            for (t = 0; t < this.trainsets.size(); ++t) {
                tempSet.clear();
                tempSet.addAll(this.trainsets.get(t).seqMap.keySet());
                for (int k = 1; k < this.trainsets.size(); ++k) {
                    tempSet.removeAll(this.trainsets.get((t + k) % this.trainsets.size()).seqMap.keySet());
                }
                outStream.println("Unique to " + this.trainsets.get(t).getTrainsetName() + ":\t" + tempSet.size());
                this.printSeqs(detailOutStream, "Unique seqs to " + this.trainsets.get(t).getTrainsetName(), tempSet, this.trainsets.get(t));
            }
            for (t = 0; t < this.trainsets.size(); ++t) {
                tempSet.clear();
                tempSet.addAll(this.trainsets.get(t).seqMap.keySet());
                tempSet.retainAll(this.trainsets.get((t + 1) % this.trainsets.size()).seqMap.keySet());
                if (this.trainsets.size() == 3) {
                    tempSet.removeAll(this.trainsets.get((t + 2) % this.trainsets.size()).seqMap.keySet());
                }
                outStream.println("Shared only by " + this.trainsets.get(t).getTrainsetName() + " and " + this.trainsets.get((t + 1) % this.trainsets.size()).getTrainsetName() + ":\t" + tempSet.size());
            }
        }
        outStream.close();
        detailOutStream.close();
    }

    private void print(PrintStream out, String message, Set<String> tempSet) {
        out.println("##" + message);
        for (String s : tempSet) {
            out.println(s);
        }
        out.println();
    }

    private void printSeqs(PrintStream out, String message, Set<String> tempSet, HierarchyTreeExtend trainset) {
        out.println("##" + message);
        for (String s : tempSet) {
            out.println(s + "\t" + (String)trainset.seqMap.get(s));
        }
        out.println();
    }

    public static void main(String[] args) throws Exception {
        String usage = "Usage: rank.txt summary_out.txt detail_out.txt set1_taxon.txt set1_seq.fasta set2_taxon.txt set2_seq.fasta ...\nrank.txt contains an ordered list of ranks to be compared, from the highest rank down to lowest rank. one per line\nEach input training set requires a taxonomy file and a sequences file with lineage information as the description\n  This program compares multiple training sets and generates the taxonomic composition data at each rank.\n  For two or three sets, it produces an summary output data suitable to generate Venn diagrams\n  and a detailed output contained the detailed list of taxa or sequences unique to each training set";
        if (args.length < 5 || args.length % 2 != 1) {
            System.err.println(usage);
            System.exit(1);
        }
        CompareTrainingSets theObj = new CompareTrainingSets(args[0], Arrays.copyOfRange(args, 3, args.length));
        theObj.compare(args[1], args[2]);
    }

    public class HierarchyTreeExtend {
        private HierarchyTree root;
        private String trainsetName;
        private HashMap<String, String> seqMap;

        public HierarchyTreeExtend(HierarchyTree root, String name) {
            this.root = root;
            this.trainsetName = name;
        }

        public HierarchyTree getRoot() {
            return this.root;
        }

        public String getTrainsetName() {
            return this.trainsetName;
        }
    }
}

