/*
 * Decompiled with CFR 0.152.
 */
package com.gkano.bioinfo.javautils;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.gkano.bioinfo.var.GeneralTools;
import com.gkano.bioinfo.var.Logger;
import com.gkano.bioinfo.vcf.SNPEncoder;
import com.gkano.bioinfo.vcf.VCFManager;
import com.gkano.bioinfo.vcf.VariantEmbeddingLoader;
import com.gkano.bioinfo.vcf.VariantKeyExtractor;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@Parameters(commandDescription="VCF2DIST")
public class UtilVCF2DIST {
    @Parameter(names={"--help"}, help=true)
    private boolean help;
    @Parameter(names={"-v", "--verbose"})
    private boolean verbose = false;
    @Parameter(description="<positional input files>")
    private List<String> positionalInputFiles = new ArrayList<String>();
    @Parameter(names={"-i", "--input"}, description="VCF input file(s)", variableArity=true)
    private List<String> namedInputFiles = new ArrayList<String>();
    @Parameter(names={"-o", "--output"}, description="Distance output file")
    private String outputFile;
    @Parameter(names={"--numberOfThreads", "-t"})
    private int numOfThreads = 1;
    @Parameter(names={"-e", "--embeddings"}, description="Variant embeddings file for embedding-based distance calculation")
    private String embeddingsFile;
    @Parameter(names={"--embeddings-format"}, description="Embeddings file format: TSV or HUGGINGFACE (auto-detected if not specified)")
    private String embeddingsFormat;
    @Parameter(names={"--variant-key"}, description="Variant key format for embedding lookup: CHROM_POS, CHROM_POS_REF_ALT, or VCF_ID")
    private String variantKeyFormat = "CHROM_POS_REF_ALT";

    public static String getUtilName() {
        return "VCF2DIST";
    }

    public void go() {
        try (PrintStream ops = GeneralTools.getPrintStreamOrExit(this.outputFile, this);){
            VCFManager vcfm = new VCFManager(Stream.concat(this.positionalInputFiles.stream(), this.namedInputFiles.stream()).collect(Collectors.toList()), this.numOfThreads, SNPEncoder.StringToStringParser, this.verbose);
            if (this.embeddingsFile != null && !this.embeddingsFile.isEmpty()) {
                Map<String, double[]> embeddings;
                Logger.info(this, "Loading embeddings from: " + this.embeddingsFile);
                if (this.embeddingsFormat != null && !this.embeddingsFormat.isEmpty()) {
                    VariantEmbeddingLoader.EmbeddingFormat format = VariantEmbeddingLoader.EmbeddingFormat.valueOf(this.embeddingsFormat.toUpperCase());
                    embeddings = VariantEmbeddingLoader.loadEmbeddings(this.embeddingsFile, format);
                } else {
                    embeddings = VariantEmbeddingLoader.loadEmbeddings(this.embeddingsFile);
                }
                VariantKeyExtractor.KeyFormat keyFormat = VariantKeyExtractor.parseFormat(this.variantKeyFormat);
                vcfm.setEmbeddings(embeddings, keyFormat);
            }
            vcfm.init();
            new Thread(vcfm).start();
            vcfm.awaitFinalization();
            double[][] distances = vcfm.reduceDotProdToDistances();
            List<String> sampleNames = vcfm.getSampleNames();
            int N = sampleNames.size();
            int numVariantsUsed = vcfm.isEmbeddingMode() ? vcfm.getNumVariants() - vcfm.getSkippedVariants() : vcfm.getNumVariants();
            ops.println(N + "\t" + numVariantsUsed);
            for (int i = 0; i < N; ++i) {
                ops.print(sampleNames.get(i));
                for (int j = 0; j < N; ++j) {
                    ops.print("\t" + GeneralTools.decimalFormat.format(distances[i][j]));
                }
                ops.println();
            }
            ops.close();
        }
        catch (Exception e) {
            Logger.error(this, e.getMessage());
        }
    }
}

