Search in sources :

Example 1 with GenotypeLikelihoodCalculator

use of org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator in project gatk by broadinstitute.

the class AlleleFrequencyCalculatorUnitTest method PLsForObviousCall.

// make PLs that correspond to an obvious call i.e. one PL is relatively big and the rest are zero
// alleleCounts is the GenotypeAlleleCounts format for the obvious genotype, with repeats but in no particular order
private static int[] PLsForObviousCall(final int ploidy, final int numAlleles, final int[] alleleCounts, final int PL) {
    final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
    final int[] result = Collections.nCopies(glCalc.genotypeCount(), PL).stream().mapToInt(n -> n).toArray();
    result[glCalc.alleleCountsToIndex(alleleCounts)] = 0;
    return result;
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Example 2 with GenotypeLikelihoodCalculator

use of org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method getLog10PNonRef.

//TODO: this should be a class of static methods once the old AFCalculator is gone.
/**
     * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
     *
     * @param vc the VariantContext holding the alleles and sample information.  The VariantContext
     *           must have at least 1 alternative allele
     * @param refSnpIndelPseudocounts a total hack.  A length-3 vector containing Dirichlet prior pseudocounts to
     *                                be given to ref, alt SNP, and alt indel alleles.  Hack won't be necessary when we destroy the old AF calculators
     * @return result (for programming convenience)
     */
@Override
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
    Utils.nonNull(vc, "VariantContext cannot be null");
    final int numAlleles = vc.getNAlleles();
    final List<Allele> alleles = vc.getAlleles();
    Utils.validateArg(numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
    final double[] priorPseudocounts = alleles.stream().mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
    double[] alleleCounts = new double[numAlleles];
    // log10(1/numAlleles)
    final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles);
    double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
    double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
    while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
        final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
        alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
        alleleCounts = newAlleleCounts;
        final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
        // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
        // effective allele frequency that it overwhelms the genotype likelihood of a real variant
        // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
        log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
    }
    double[] log10POfZeroCountsByAllele = new double[numAlleles];
    double log10PNoVariant = 0;
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
        //the total probability
        log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
        // per allele non-log space probabilities of zero counts for this sample
        // for each allele calculate the total probability of genotypes containing at least one copy of the allele
        final double[] log10ProbabilityOfNonZeroAltAlleles = new double[numAlleles];
        Arrays.fill(log10ProbabilityOfNonZeroAltAlleles, Double.NEGATIVE_INFINITY);
        for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
            final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
            glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) -> log10ProbabilityOfNonZeroAltAlleles[alleleIndex] = MathUtils.log10SumLog10(log10ProbabilityOfNonZeroAltAlleles[alleleIndex], log10GenotypePosterior));
        }
        for (int allele = 0; allele < numAlleles; allele++) {
            // if prob of non hom ref == 1 up to numerical precision, short-circuit to avoid NaN
            if (log10ProbabilityOfNonZeroAltAlleles[allele] >= 0) {
                log10POfZeroCountsByAllele[allele] = Double.NEGATIVE_INFINITY;
            } else {
                log10POfZeroCountsByAllele[allele] += MathUtils.log10OneMinusPow10(log10ProbabilityOfNonZeroAltAlleles[allele]);
            }
        }
    }
    // unfortunately AFCalculationResult expects integers for the MLE.  We really should emit the EM no-integer values
    // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
    final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
    final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
    //skip the ref allele (index 0)
    final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed().collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
    // we compute posteriors here and don't have the same prior that AFCalculationResult expects.  Therefore, we
    // give it our posterior as its "likelihood" along with a flat dummy prior
    //TODO: HACK must be negative for AFCalcResult
    final double[] dummyFlatPrior = { -1e-10, -1e-10 };
    final double[] log10PosteriorOfNoVariantYesVariant = { log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant) };
    return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Genotype(htsjdk.variant.variantcontext.Genotype) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Allele(htsjdk.variant.variantcontext.Allele) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Example 3 with GenotypeLikelihoodCalculator

use of org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method log10NormalizedGenotypePosteriors.

private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) {
    final double[] log10Likelihoods = g.getLikelihoods().getAsVector();
    final double[] log10Posteriors = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> {
        final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex);
        return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex] + gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]);
    });
    return MathUtils.normalizeLog10(log10Posteriors);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts)

Example 4 with GenotypeLikelihoodCalculator

use of org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator in project gatk by broadinstitute.

the class GeneralPloidyExactAFCalculator method computeLofK.

/**
     * Compute likelihood of a particular AC conformation and update AFresult object
     * @param set                     Set of AC counts to compute
     * @param firstGLs                  Original pool likelihoods before combining
     * @param secondGL                  New GL vector with additional pool
     * @param log10AlleleFrequencyPriors     Allele frequency priors
     * @param numAlleles                Number of alleles (including ref)
     * @param ploidy1                   Ploidy of original pool (combined)
     * @param ploidy2                   Ploidy of new pool
     * @return                          log-likelihood of requested conformation
     */
private double computeLofK(final ExactACset set, final CombinedPoolLikelihoods firstGLs, final double[] secondGL, final double[] log10AlleleFrequencyPriors, final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) {
    final int newPloidy = ploidy1 + ploidy2;
    // sanity check
    int totalAltK = set.getACsum();
    if (newPloidy != totalAltK) {
        throw new GATKException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
    }
    totalAltK -= set.getACcounts().getCounts()[0];
    // special case for k = 0 over all k
    if (totalAltK == 0) {
        // all-ref case
        final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
        set.getLog10Likelihoods()[0] = log10Lof0;
        stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
        stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
        return log10Lof0;
    } else {
        // initialize result with denominator
        // ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
        // To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
        final int[] currentCount = set.getACcounts().getCounts();
        final double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy2, numAlleles);
        for (int PLIndex = 0; PLIndex < glCalc.genotypeCount(); PLIndex++) {
            final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(PLIndex);
            final int[] acCount2 = alleleCounts.alleleCountsByIndex(numAlleles - 1);
            final int[] acCount1 = MathUtils.vectorDiff(currentCount, acCount2);
            // for conformation to be valid, all elements of g2 have to be <= elements of current AC set
            if (isValidConformation(acCount1, ploidy1) && firstGLs.hasConformation(acCount1)) {
                final double gl2 = secondGL[PLIndex];
                if (!Double.isInfinite(gl2)) {
                    final double firstGL = firstGLs.getLikelihoodOfConformation(acCount1);
                    final double num1 = MathUtils.log10MultinomialCoefficient(ploidy1, acCount1);
                    final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
                    final double sum = firstGL + gl2 + num1 + num2;
                    set.getLog10Likelihoods()[0] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[0], sum);
                }
            }
        }
        set.getLog10Likelihoods()[0] += denom;
    }
    double log10LofK = set.getLog10Likelihoods()[0];
    // update the MLE if necessary
    final int[] altCounts = Arrays.copyOfRange(set.getACcounts().getCounts(), 1, set.getACcounts().getCounts().length);
    // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
    stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
    // apply the priors over each alternate allele
    for (final int ACcount : altCounts) {
        if (ACcount > 0) {
            log10LofK += log10AlleleFrequencyPriors[ACcount];
        }
    }
    // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
    stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
    return log10LofK;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts)

Example 5 with GenotypeLikelihoodCalculator

use of org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method effectiveAlleleCounts.

// effectiveAlleleCounts[allele a] = SUM_{genotypes g} (posterior_probability(g) * num_copies of a in g), which we denote as SUM [n_g p_g]
// for numerical stability we will do this in log space:
// count = SUM 10^(log (n_g p_g)) = SUM 10^(log n_g + log p_g)
// thanks to the log-sum-exp trick this lets us work with log posteriors alone
private double[] effectiveAlleleCounts(final VariantContext vc, final double[] log10AlleleFrequencies) {
    final int numAlleles = vc.getNAlleles();
    Utils.validateArg(numAlleles == log10AlleleFrequencies.length, "number of alleles inconsistent");
    final double[] log10Result = new double[numAlleles];
    Arrays.fill(log10Result, Double.NEGATIVE_INFINITY);
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(g.getPloidy(), numAlleles);
        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
        new IndexRange(0, glCalc.genotypeCount()).forEach(genotypeIndex -> glCalc.genotypeAlleleCountsAt(genotypeIndex).forEachAlleleIndexAndCount((alleleIndex, count) -> log10Result[alleleIndex] = MathUtils.log10SumLog10(log10Result[alleleIndex], log10GenotypePosteriors[genotypeIndex] + MathUtils.log10(count))));
    }
    return MathUtils.applyToArrayInPlace(log10Result, x -> Math.pow(10.0, x));
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Aggregations

GenotypeLikelihoodCalculator (org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)5 Collectors (java.util.stream.Collectors)4 IntStream (java.util.stream.IntStream)4 GenotypeAlleleCounts (org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts)4 GenotypeLikelihoodCalculators (org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators)4 Allele (htsjdk.variant.variantcontext.Allele)3 Genotype (htsjdk.variant.variantcontext.Genotype)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 Arrays (java.util.Arrays)3 List (java.util.List)3 Map (java.util.Map)3 MathArrays (org.apache.commons.math3.util.MathArrays)3 Dirichlet (org.broadinstitute.hellbender.utils.Dirichlet)3 IndexRange (org.broadinstitute.hellbender.utils.IndexRange)3 MathUtils (org.broadinstitute.hellbender.utils.MathUtils)3 Utils (org.broadinstitute.hellbender.utils.Utils)3 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)1 java.util (java.util)1 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)1 Pair (org.apache.commons.lang3.tuple.Pair)1