/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train;

import edu.msu.cme.rdp.classifier.train.CopyNumberParser;
import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.NameRankDupException;
import edu.msu.cme.rdp.classifier.train.RawGenusWordConditionalProb;
import edu.msu.cme.rdp.classifier.train.RawHierarchyTree;
import edu.msu.cme.rdp.classifier.train.Taxonomy;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public class TreeFactory {
    private static String dformat = "%1$.2f";
    private List genusNodeList = null;
    private List genus_wordConditionalProbList = new ArrayList();
    private int[] wordProbPointerArr = new int[65537];
    private float[] logArr;
    private RawHierarchyTree rootTree;
    private Map<String, List> taxnameMap = new HashMap<String, List>();
    private Map<Integer, Taxonomy> taxidMap = new HashMap<Integer, Taxonomy>();
    private Map<String, Integer> taxnameRankMap = new HashMap<String, Integer>();
    private float[] wordPriorArr = new float[65536];
    private int ROOT_DEPTH = 0;
    private int totalSequences = 0;
    private final float WF1 = 0.5f;
    private final float WF2 = 1.0f;
    private BufferedWriter treeFile;
    private String trainingVersion;
    private String trained_rank = null;

    public TreeFactory(Reader taxReader, int trainsetNo, String version, String modification) throws IOException, NameRankDupException {
        this.trainingVersion = "<trainsetNo>" + trainsetNo + "</trainsetNo><version>" + version + "</version><modversion>" + modification + "</modversion>";
        this.creatTaxidMap(taxReader);
    }

    private void creatTaxidMap(Reader taxReader) throws IOException, NameRankDupException {
        String line;
        BufferedReader reader = new BufferedReader(taxReader);
        while ((line = reader.readLine()) != null) {
            if (line.length() == 0) continue;
            StringTokenizer st = new StringTokenizer(line, "*");
            if (st.countTokens() < 5) {
                throw new IllegalArgumentException("\nIllegal taxonomy format at " + line);
            }
            try {
                int taxid = Integer.parseInt(st.nextToken().trim());
                String taxname = st.nextToken().trim();
                int pid = Integer.parseInt(st.nextToken().trim());
                int depth = Integer.parseInt(st.nextToken().trim());
                ArrayList<Taxonomy> taxList = (ArrayList<Taxonomy>)this.taxnameMap.get(taxname);
                if (taxList == null) {
                    taxList = new ArrayList<Taxonomy>();
                }
                Taxonomy tax = new Taxonomy(taxid, taxname, pid, depth, st.nextToken().trim());
                taxList.add(tax);
                this.taxnameMap.put(taxname, taxList);
                if (this.taxidMap.containsKey(taxid)) {
                    throw new NameRankDupException("Error: duplicate taxid found : " + taxid);
                }
                this.taxidMap.put(new Integer(taxid), tax);
                String name_rank = (taxname + "\t" + tax.hierLevel).toLowerCase();
                Integer existCount = this.taxnameRankMap.get(name_rank);
                if (existCount == null) {
                    existCount = new Integer(0);
                }
                this.taxnameRankMap.put(name_rank, existCount + 1);
                if (tax.depth != this.ROOT_DEPTH) continue;
                if (this.rootTree == null) {
                    this.rootTree = new RawHierarchyTree(taxname, null, tax);
                    continue;
                }
                throw new IllegalArgumentException("Error: taxon " + tax.getTaxID() + " has the depth set to '0'. Only the root taxon can have the depth set to '0'");
            }
            catch (NumberFormatException e) {
                throw new IllegalArgumentException("\nError: The value for taxid, parentid and the depth should be integer in : " + line);
            }
        }
        if (this.rootTree == null) {
            throw new IllegalArgumentException("Error: no root taxon with depth '0' defined in the taxonomy file.");
        }
        String errors = this.nameRankSanityCheck();
        if (errors != null) {
            throw new NameRankDupException("Error: duplicate taxon name and rank in the taxonomy file.\n" + errors);
        }
    }

    private String nameRankSanityCheck() {
        StringBuilder buf = new StringBuilder();
        for (String name_rank : this.taxnameRankMap.keySet()) {
            int count = this.taxnameRankMap.get(name_rank);
            if (count <= 1) continue;
            buf.append(name_rank).append("\t").append(count).append("\n");
        }
        if (buf.length() == 0) {
            return null;
        }
        return buf.toString();
    }

    public void parseSequenceFile(LineageSequenceParser parser) throws IOException {
        while (parser.hasNext()) {
            LineageSequence pSeq = parser.next();
            if (pSeq.getAncestors().size() == 1) {
                this.addSequencewithTaxid(pSeq);
                continue;
            }
            this.addSequencewithLineage(pSeq);
        }
        parser.close();
    }

    public void parseCopyNumberFile(String cnFile) throws IOException {
        HashMap<String, CopyNumberParser.TaxonCopyNumber> cnMap = new CopyNumberParser().parse(cnFile);
        this.getGenusNodeList();
        for (int index = 0; index < this.genusNodeList.size(); ++index) {
            RawHierarchyTree aTree = (RawHierarchyTree)this.genusNodeList.get(index);
            String key = (aTree.getName() + aTree.getTaxonomy().getHierLevel()).toLowerCase();
            CopyNumberParser.TaxonCopyNumber cn = cnMap.get(key);
            if (cn == null) continue;
            aTree.setCopyNumber(cn.getCopyNumber());
            cnMap.remove(key);
        }
        StringBuilder msg = new StringBuilder();
        for (CopyNumberParser.TaxonCopyNumber cn : cnMap.values()) {
            if (!cn.rank.equalsIgnoreCase(this.trained_rank)) continue;
            msg.append(cn.name).append("\n");
        }
        if (msg.length() > 0) {
            throw new IllegalArgumentException("Error: The following taxon names at rank " + this.trained_rank + " in the copynumber file are not found in the input taxonomy file\n" + msg);
        }
        this.bottomUpSetCopyNumber(this.rootTree);
        if (!this.rootTree.hasCopyNumber()) {
            throw new IllegalArgumentException("Error: Problem setting copy number, no taxon at rank " + this.trained_rank + " found in the copynumber file");
        }
        this.topDownFillCopyNumber(this.rootTree);
    }

    private void addSequencewithTaxid(LineageSequence pSeq) throws IOException {
        ArrayList<Taxonomy> taxonList = new ArrayList<Taxonomy>();
        Taxonomy tax = this.taxidMap.get(new Integer(pSeq.getAncestors().get(0)));
        while (tax != null) {
            taxonList.add(tax);
            if (tax.getTaxID() == this.rootTree.getTaxonomy().getTaxID()) break;
            tax = this.taxidMap.get(new Integer(tax.getParentID()));
        }
        if (tax == null) {
            throw new IllegalArgumentException("Problem retrieving ancestor taxon for Sequence " + pSeq.getSeqName());
        }
        if (tax.getTaxID() != this.rootTree.getTaxonomy().getTaxID()) {
            throw new IllegalArgumentException("Sequence " + pSeq.getSeqName() + " has conflicting ancestor root name: " + tax.getTaxName());
        }
        RawHierarchyTree curTree = this.rootTree;
        for (int i = taxonList.size() - 2; i >= 0; --i) {
            RawHierarchyTree tmp = curTree.getSubclassbyName(((Taxonomy)taxonList.get(i)).getTaxName());
            curTree = tmp == null ? new RawHierarchyTree(((Taxonomy)taxonList.get(i)).getTaxName(), curTree, (Taxonomy)taxonList.get(i)) : tmp;
            if (i != 0) continue;
            curTree.initWordOccurrence(pSeq, this.wordPriorArr);
            if (this.trained_rank == null) {
                this.trained_rank = curTree.getTaxonomy().getHierLevel();
            } else if (!this.trained_rank.equalsIgnoreCase(curTree.getTaxonomy().getHierLevel())) {
                throw new IllegalArgumentException("Sequence " + pSeq.getSeqName() + " has different lowest rank: " + curTree.getTaxonomy().getHierLevel() + " from the previous lowest rank: " + this.trained_rank);
            }
            ++this.totalSequences;
        }
    }

    private void addSequencewithLineage(LineageSequence pSeq) throws IOException {
        int size = pSeq.getAncestors().size();
        if (!pSeq.getAncestors().get(0).equalsIgnoreCase(this.rootTree.getName())) {
            throw new IllegalArgumentException("Sequence " + pSeq.getSeqName() + " has conflicting root name: " + pSeq.getAncestors().get(0));
        }
        RawHierarchyTree curTree = this.rootTree;
        for (int i = 1; i < size; ++i) {
            RawHierarchyTree tmp = curTree.getSubclassbyName(pSeq.getAncestors().get(i));
            if (tmp == null) {
                Taxonomy tax = this.getTaxonomy(pSeq, curTree.getTaxonomy().taxID, i);
                curTree = new RawHierarchyTree(pSeq.getAncestors().get(i), curTree, tax);
            } else {
                curTree = tmp;
            }
            if (i != size - 1) continue;
            curTree.initWordOccurrence(pSeq, this.wordPriorArr);
            ++this.totalSequences;
            if (this.trained_rank == null) {
                this.trained_rank = curTree.getTaxonomy().getHierLevel();
                continue;
            }
            if (this.trained_rank.equalsIgnoreCase(curTree.getTaxonomy().getHierLevel())) continue;
            throw new IllegalArgumentException("Sequence " + pSeq.getSeqName() + " has different lowest rank: " + curTree.getTaxonomy().getHierLevel() + " from the previous lowest rank: " + this.trained_rank);
        }
    }

    private Taxonomy getTaxonomy(LineageSequence pSeq, int pid, int index) {
        List<String> ancestor = pSeq.getAncestors();
        if (ancestor.isEmpty()) {
            throw new IllegalArgumentException("Error: No ancestors found for sequence: " + pSeq.getSeqName() + "! Please check the source file.");
        }
        String name = ancestor.get(index);
        ArrayList taxList = (ArrayList)this.taxnameMap.get(name);
        if (taxList == null) {
            throw new IllegalArgumentException("\nThe taxID for ancestor: " + name + " of sequence: " + pSeq.getSeqName() + " at depth: " + index + " with parent id: " + pid + " is not found!");
        }
        Taxonomy result = null;
        for (int i = 0; i < taxList.size(); ++i) {
            Taxonomy tax = (Taxonomy)taxList.get(i);
            if (tax.parentID != pid || tax.depth != index) continue;
            result = tax;
            break;
        }
        if (result == null) {
            throw new IllegalArgumentException("\nThe taxID for ancestor: " + name + " of sequence: " + pSeq.getSeqName() + " at depth: " + index + " with parent id: " + pid + " is not found!");
        }
        return result;
    }

    public RawHierarchyTree getRoot() {
        return this.rootTree;
    }

    void createGenusWordConditionalProb() {
        int i;
        this.getGenusNodeList();
        if (this.genusNodeList.isEmpty()) {
            throw new IllegalArgumentException("\nThere is no node at " + this.trained_rank);
        }
        int maxNumOfLeaves = 0;
        for (i = 0; i < this.wordPriorArr.length; ++i) {
            this.wordPriorArr[i] = (this.wordPriorArr[i] + 0.5f) / ((float)this.totalSequences + 1.0f);
            this.wordProbPointerArr[i] = this.genus_wordConditionalProbList.size();
            for (int index = 0; index < this.genusNodeList.size(); ++index) {
                RawHierarchyTree aTree = (RawHierarchyTree)this.genusNodeList.get(index);
                int wordOccurrence = aTree.getWordOccurrence(i);
                int numOfLeaves = aTree.getLeaveCount();
                if (wordOccurrence > 0) {
                    float prob = (float)Math.log(((float)wordOccurrence + this.wordPriorArr[i]) / ((float)numOfLeaves + 1.0f));
                    this.genus_wordConditionalProbList.add(new RawGenusWordConditionalProb(index, prob));
                }
                if (numOfLeaves <= maxNumOfLeaves) continue;
                maxNumOfLeaves = numOfLeaves;
            }
            this.wordPriorArr[i] = (float)Math.log(this.wordPriorArr[i]);
        }
        this.wordProbPointerArr[this.wordProbPointerArr.length - 1] = this.genus_wordConditionalProbList.size();
        for (int index = 0; index < this.genusNodeList.size(); ++index) {
            ((RawHierarchyTree)this.genusNodeList.get(index)).releaseWordOccurrence();
        }
        this.logArr = new float[++maxNumOfLeaves];
        for (i = 0; i < maxNumOfLeaves; ++i) {
            this.logArr[i] = (float)Math.log((float)i + 1.0f);
        }
    }

    float getLogWordPrior(int wordIndex) {
        return this.wordPriorArr[wordIndex];
    }

    List getGenusNodeList() {
        if (this.trained_rank == null) {
            throw new IllegalArgumentException("Need to read a sequence file to set the rank to train on");
        }
        if (this.genusNodeList != null) {
            return this.genusNodeList;
        }
        this.genusNodeList = new ArrayList();
        this.createNodeList(this.getRoot(), this.trained_rank, this.genusNodeList);
        return this.genusNodeList;
    }

    float getLogLeaveCount(int i) {
        return this.logArr[i];
    }

    int getStartIndex(int wordIndex) {
        return this.wordProbPointerArr[wordIndex];
    }

    int getStopIndex(int wordIndex) {
        return this.wordProbPointerArr[wordIndex + 1];
    }

    RawGenusWordConditionalProb getWordConditionalProb(int posIndex) {
        return (RawGenusWordConditionalProb)this.genus_wordConditionalProbList.get(posIndex);
    }

    void createNodeList(RawHierarchyTree node, String level, List nodeList) {
        if (node == null) {
            return;
        }
        if (node.getTaxonomy().hierLevel.equalsIgnoreCase(level)) {
            nodeList.add(node);
            node.setGenusIndex(nodeList.size() - 1);
            return;
        }
        Collection<Object> al = new ArrayList();
        al = node.getSubclasses();
        if (al.isEmpty()) {
            return;
        }
        Iterator<Object> i = al.iterator();
        while (i.hasNext()) {
            this.createNodeList((RawHierarchyTree)i.next(), level, nodeList);
        }
    }

    void bottomUpSetCopyNumber(RawHierarchyTree node) {
        if (node.getTaxonomy().hierLevel.equals(this.trained_rank)) {
            return;
        }
        Collection<RawHierarchyTree> al = node.getSubclasses();
        if (al.isEmpty()) {
            return;
        }
        Iterator<RawHierarchyTree> i = al.iterator();
        while (i.hasNext()) {
            this.bottomUpSetCopyNumber(i.next());
        }
        float sum = 0.0f;
        int childwithcn = 0;
        for (RawHierarchyTree c : al) {
            if (!c.hasCopyNumber()) continue;
            sum += c.getCopyNumber();
            ++childwithcn;
        }
        node.setCopyNumber(sum / (float)childwithcn);
    }

    void topDownFillCopyNumber(RawHierarchyTree node) {
        Collection<RawHierarchyTree> al;
        if (!node.hasCopyNumber()) {
            node.setCopyNumber(node.getParent().getCopyNumber());
        }
        if ((al = node.getSubclasses()).isEmpty()) {
            return;
        }
        Iterator<RawHierarchyTree> i = al.iterator();
        while (i.hasNext()) {
            this.topDownFillCopyNumber(i.next());
        }
    }

    void printTrainingFiles(String outdir) throws IOException {
        this.treeFile = new BufferedWriter(new FileWriter(outdir + "bergeyTrainingTree.xml"));
        this.treeFile.write(this.trainingVersion + "<file>bergeyTrainingTree</file>\n");
        this.displayTrainingTree(this.rootTree);
        this.treeFile.close();
    }

    private void displayTrainingTree(RawHierarchyTree root) throws IOException {
        Taxonomy taxon = root.getTaxonomy();
        this.treeFile.write("<TreeNode name=\"" + root.getName().replaceAll("&", "").replaceAll("\"", "&quot;") + "\" taxid=\"" + taxon.taxID + "\" rank=\"" + taxon.hierLevel + "\" parentTaxid=\"" + taxon.parentID + "\" leaveCount=\"" + root.getLeaveCount() + "\" genusIndex=\"" + root.getGenusIndex() + "\"");
        if (this.rootTree.hasCopyNumber()) {
            this.treeFile.write(" cpNumber=\"" + String.format(dformat, Float.valueOf(root.getCopyNumber())) + "\"");
        }
        this.treeFile.write("></TreeNode>\n");
        Iterator<RawHierarchyTree> i = root.getSubclasses().iterator();
        while (i.hasNext()) {
            this.displayTrainingTree(i.next());
        }
    }

    void printWordPriors(String outdir) throws IOException {
        BufferedWriter outfile = new BufferedWriter(new FileWriter(outdir + "logWordPrior.txt"));
        outfile.write(this.trainingVersion + "<file>logWordPrior</file>\n");
        for (int i = 0; i < this.wordPriorArr.length; ++i) {
            outfile.write(i + "\t" + this.wordPriorArr[i] + "\n");
        }
        outfile.close();
    }

    void printWordConditionalProbIndexArr(String outdir) throws IOException {
        BufferedWriter outfile = new BufferedWriter(new FileWriter(outdir + "wordConditionalProbIndexArr.txt"));
        outfile.write(this.trainingVersion + "<file>wordConditionalProbIndexArr</file>\n");
        for (int i = 0; i < this.wordProbPointerArr.length; ++i) {
            outfile.write(i + "\t" + this.wordProbPointerArr[i] + "\n");
        }
        outfile.close();
    }

    void printGenusIndex_WordProbArr(String outdir) throws IOException {
        BufferedWriter outfile = new BufferedWriter(new FileWriter(outdir + "genus_wordConditionalProbList.txt"));
        outfile.write(this.trainingVersion + "<file>genus_wordConditionalProbList</file>\n");
        for (int i = 0; i < this.genus_wordConditionalProbList.size(); ++i) {
            RawGenusWordConditionalProb prob = (RawGenusWordConditionalProb)this.genus_wordConditionalProbList.get(i);
            outfile.write(prob.getGenusIndex() + "\t" + prob.getProbability() + "\n");
        }
        outfile.close();
    }
}

