Search in sources :

Example 6 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class AFCalculatorProvider method getInstance.

/**
     * Returns a AF calculator capable to handle a particular variant-context.
     * @param variantContext the target context build.
     * @param defaultPloidy the assumed ploidy in case that there is no a GT call present to determine it.
     * @return never {@code null}
     */
public AFCalculator getInstance(final VariantContext variantContext, final int defaultPloidy, final int maximumAltAlleles) {
    Utils.nonNull(variantContext, "variant context cannot be null");
    final int sampleCount = variantContext.getNSamples();
    if (sampleCount == 0) {
        return getInstance(defaultPloidy, maximumAltAlleles);
    }
    final GenotypesContext genotypes = variantContext.getGenotypes();
    final Genotype firstGenotype = genotypes.get(0);
    int ploidy = firstGenotype.getPloidy();
    if (ploidy <= 0) {
        ploidy = defaultPloidy;
    }
    for (int i = 1; i < sampleCount; i++) {
        final Genotype genotype = genotypes.get(i);
        final int declaredPloidy = genotype.getPloidy();
        final int actualPloidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
        if (actualPloidy != ploidy) {
            ploidy = AFCalculatorImplementation.UNBOUND_PLOIDY;
            break;
        }
    }
    return getInstance(ploidy, Math.min(variantContext.getNAlleles() - 1, maximumAltAlleles));
}
Also used : GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) Genotype(htsjdk.variant.variantcontext.Genotype)

Example 7 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method getLog10PNonRef.

//TODO: this should be a class of static methods once the old AFCalculator is gone.
/**
     * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
     *
     * @param vc the VariantContext holding the alleles and sample information.  The VariantContext
     *           must have at least 1 alternative allele
     * @param refSnpIndelPseudocounts a total hack.  A length-3 vector containing Dirichlet prior pseudocounts to
     *                                be given to ref, alt SNP, and alt indel alleles.  Hack won't be necessary when we destroy the old AF calculators
     * @return result (for programming convenience)
     */
@Override
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
    Utils.nonNull(vc, "VariantContext cannot be null");
    final int numAlleles = vc.getNAlleles();
    final List<Allele> alleles = vc.getAlleles();
    Utils.validateArg(numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
    final double[] priorPseudocounts = alleles.stream().mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
    double[] alleleCounts = new double[numAlleles];
    // log10(1/numAlleles)
    final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles);
    double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
    double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
    while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
        final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
        alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
        alleleCounts = newAlleleCounts;
        final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
        // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
        // effective allele frequency that it overwhelms the genotype likelihood of a real variant
        // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
        log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
    }
    double[] log10POfZeroCountsByAllele = new double[numAlleles];
    double log10PNoVariant = 0;
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
        //the total probability
        log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
        // per allele non-log space probabilities of zero counts for this sample
        // for each allele calculate the total probability of genotypes containing at least one copy of the allele
        final double[] log10ProbabilityOfNonZeroAltAlleles = new double[numAlleles];
        Arrays.fill(log10ProbabilityOfNonZeroAltAlleles, Double.NEGATIVE_INFINITY);
        for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
            final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
            glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) -> log10ProbabilityOfNonZeroAltAlleles[alleleIndex] = MathUtils.log10SumLog10(log10ProbabilityOfNonZeroAltAlleles[alleleIndex], log10GenotypePosterior));
        }
        for (int allele = 0; allele < numAlleles; allele++) {
            // if prob of non hom ref == 1 up to numerical precision, short-circuit to avoid NaN
            if (log10ProbabilityOfNonZeroAltAlleles[allele] >= 0) {
                log10POfZeroCountsByAllele[allele] = Double.NEGATIVE_INFINITY;
            } else {
                log10POfZeroCountsByAllele[allele] += MathUtils.log10OneMinusPow10(log10ProbabilityOfNonZeroAltAlleles[allele]);
            }
        }
    }
    // unfortunately AFCalculationResult expects integers for the MLE.  We really should emit the EM no-integer values
    // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
    final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
    final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
    //skip the ref allele (index 0)
    final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed().collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
    // we compute posteriors here and don't have the same prior that AFCalculationResult expects.  Therefore, we
    // give it our posterior as its "likelihood" along with a flat dummy prior
    //TODO: HACK must be negative for AFCalcResult
    final double[] dummyFlatPrior = { -1e-10, -1e-10 };
    final double[] log10PosteriorOfNoVariantYesVariant = { log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant) };
    return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Genotype(htsjdk.variant.variantcontext.Genotype) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Allele(htsjdk.variant.variantcontext.Allele) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Example 8 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method log10NormalizedGenotypePosteriors.

private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) {
    final double[] log10Likelihoods = g.getLikelihoods().getAsVector();
    final double[] log10Posteriors = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> {
        final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex);
        return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex] + gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]);
    });
    return MathUtils.normalizeLog10(log10Posteriors);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts)

Example 9 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineState.

@Test(dataProvider = "genotypeConcordanceDetermineStateDataProvider")
public void testGenotypeConcordanceDetermineState(final Allele truthAllele1, final Allele truthAllele2, final TruthState expectedTruthState, final Allele callAllele1, final Allele callAllele2, final CallState expectedCallState) throws Exception {
    final List<Allele> truthAlleles = makeUniqueListOfAlleles(truthAllele1, truthAllele2);
    final Genotype truthGt = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(truthAllele1, truthAllele2));
    final VariantContext truthVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, truthAlleles).genotypes(truthGt).make();
    final List<Allele> callAlleles = makeUniqueListOfAlleles(callAllele1, callAllele2);
    final Genotype callGt = GenotypeBuilder.create(CALL_SAMPLE_NAME, Arrays.asList(callAllele1, callAllele2));
    final VariantContext callVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, callAlleles).genotypes(callGt).make();
    testGenotypeConcordanceDetermineState(truthVariantContext, expectedTruthState, callVariantContext, expectedCallState, 0, 0);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContext(htsjdk.variant.variantcontext.VariantContext) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 10 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineStateFilter.

@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
    final Set<String> filters = new HashSet<>(Arrays.asList("BAD!"));
    // Filtering on the variant context
    final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();
    final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
    final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
    final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);
    // Filtering on the genotype
    final List<String> gtFilters = new ArrayList<>(Arrays.asList("WICKED"));
    final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
    final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContext(htsjdk.variant.variantcontext.VariantContext) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

Genotype (htsjdk.variant.variantcontext.Genotype)150 VariantContext (htsjdk.variant.variantcontext.VariantContext)97 Allele (htsjdk.variant.variantcontext.Allele)82 ArrayList (java.util.ArrayList)54 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)52 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)51 File (java.io.File)48 VCFHeader (htsjdk.variant.vcf.VCFHeader)46 IOException (java.io.IOException)45 Collectors (java.util.stream.Collectors)42 Test (org.testng.annotations.Test)37 HashSet (java.util.HashSet)35 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)29 List (java.util.List)29 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)27 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)25 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)23 HashMap (java.util.HashMap)23 java.util (java.util)22 Set (java.util.Set)22