Search in sources :

Example 6 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class AssemblyBasedCallerUtils method realignReadsToTheirBestHaplotype.

/**
     * Returns a map with the original read as a key and the realigned read as the value.
     * <p>
     *     Missing keys or equivalent key and value pairs mean that the read was not realigned.
     * </p>
     * @return never {@code null}
     */
public static Map<GATKRead, GATKRead> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final Locatable paddedReferenceLoc) {
    final Collection<ReadLikelihoods<Haplotype>.BestAllele<Haplotype>> bestAlleles = originalReadLikelihoods.bestAlleles();
    final Map<GATKRead, GATKRead> result = new HashMap<>(bestAlleles.size());
    for (final ReadLikelihoods<Haplotype>.BestAllele<Haplotype> bestAllele : bestAlleles) {
        final GATKRead originalRead = bestAllele.read;
        final Haplotype bestHaplotype = bestAllele.allele;
        final boolean isInformative = bestAllele.isInformative();
        final GATKRead realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative);
        result.put(originalRead, realignedRead);
    }
    return result;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods)

Example 7 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class StrandArtifact method annotate.

@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb);
    Utils.nonNull(vc);
    Utils.nonNull(likelihoods);
    // do not annotate the genotype fields for normal
    if (g.isHomRef()) {
        return;
    }
    pi.put(NO_ARTIFACT, 0.95);
    pi.put(ART_FWD, 0.025);
    pi.put(ART_REV, 0.025);
    // We use the allele with highest LOD score
    final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
    final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
    final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
    final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
    final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numAltReads = numFwdAltReads + numRevAltReads;
    final int numReads = numFwdReads + numRevReads;
    final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
    final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
    /*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
    // the integrand is a polynomial of degree n, where n is the number of reads at the locus
    // thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
    final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
    final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
    final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
    unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
    /*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
    final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
    unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
    final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
    /*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
    // For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
    // the posterior probability of p(f | x^+, x^-, z)
    final int numSamplePoints = 100;
    final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
    double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
    double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
    for (int i = 0; i < samplePoints.length; i++) {
        final double f = samplePoints[i];
        likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
        likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
    }
    final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
    final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
    // In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
    maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
    gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
    gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) java.util(java.util) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) Allele(htsjdk.variant.variantcontext.Allele) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ)

Example 8 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class OxoGReadCounts method annotate.

@Override
public void annotate(final ReferenceContext refContext, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");
    if (g == null || !g.isCalled() || likelihoods == null || !vc.isSNP()) {
        return;
    }
    final Allele ref = vc.getReference();
    final Allele alt = vc.getAlternateAllele(0);
    int alt_F1R2 = 0;
    int alt_F2R1 = 0;
    int ref_F1R2 = 0;
    int ref_F2R1 = 0;
    for (final ReadLikelihoods<Allele>.BestAllele<Allele> bestAllele : likelihoods.bestAlleles(g.getSampleName())) {
        final GATKRead read = bestAllele.read;
        if (bestAllele.isInformative() && isUsableRead(read) && read.isPaired()) {
            final Allele allele = bestAllele.allele;
            if (allele.equals(ref, true)) {
                if (read.isReverseStrand() == read.isFirstOfPair()) {
                    ref_F2R1++;
                } else {
                    ref_F1R2++;
                }
            } else if (allele.equals(alt, true)) {
                if (read.isReverseStrand() == read.isFirstOfPair()) {
                    alt_F2R1++;
                } else {
                    alt_F1R2++;
                }
            }
        }
    }
    final double numerator;
    if (ref.equals(REF_C) || ref.equals(REF_A)) {
        numerator = alt_F2R1;
    } else {
        numerator = alt_F1R2;
    }
    final double denominator = alt_F1R2 + alt_F2R1;
    final double fraction = numerator / denominator;
    gb.attribute(GATKVCFConstants.OXOG_ALT_F1R2_KEY, alt_F1R2);
    gb.attribute(GATKVCFConstants.OXOG_ALT_F2R1_KEY, alt_F2R1);
    gb.attribute(GATKVCFConstants.OXOG_REF_F1R2_KEY, ref_F1R2);
    gb.attribute(GATKVCFConstants.OXOG_REF_F2R1_KEY, ref_F2R1);
    gb.attribute(GATKVCFConstants.OXOG_FRACTION_KEY, fraction);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Allele(htsjdk.variant.variantcontext.Allele) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods)

Example 9 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class MappingQualityRankSumTestUnitTest method testMQ.

@Test
public void testMQ() {
    final InfoFieldAnnotation ann = new MappingQualityRankSumTest();
    final String key = GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY;
    final MannWhitneyU mannWhitneyU = new MannWhitneyU();
    final int[] altMappingQualities = { 10, 20 };
    final int[] refMappingQualities = { 100, 110 };
    final List<GATKRead> refReads = Arrays.stream(refMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
    final List<GATKRead> altReads = Arrays.stream(altMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
    final ReadLikelihoods<Allele> likelihoods = AnnotationArtificialData.makeLikelihoods(sample1, refReads, altReads, -100.0, -100.0, REF, ALT);
    final VariantContext vc = makeVC(REF, ALT);
    final Map<String, Object> annotate = ann.annotate(null, vc, likelihoods);
    final double zScore = mannWhitneyU.test(new double[] { altMappingQualities[0], altMappingQualities[1] }, new double[] { refMappingQualities[0], refMappingQualities[1] }, MannWhitneyU.TestType.FIRST_DOMINATES).getZ();
    final String zScoreStr = String.format("%.3f", zScore);
    Assert.assertEquals(annotate.get(key), zScoreStr);
    Assert.assertEquals(ann.getDescriptions().size(), 1);
    Assert.assertEquals(ann.getDescriptions().get(0).getID(), key);
    Assert.assertEquals(ann.getKeyNames().size(), 1);
    Assert.assertEquals(ann.getKeyNames().get(0), key);
}
Also used : TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) Arrays(java.util.Arrays) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) Test(org.testng.annotations.Test) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) AS_RankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RankSumTest) List(java.util.List) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) Map(java.util.Map) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) MannWhitneyU(org.broadinstitute.hellbender.utils.MannWhitneyU) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) MannWhitneyU(org.broadinstitute.hellbender.utils.MannWhitneyU) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) Test(org.testng.annotations.Test) AS_RankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RankSumTest)

Example 10 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class MappingQualityRankSumTestUnitTest method testAS_MQRaw.

@Test
public void testAS_MQRaw() {
    final AS_RankSumTest ann = new AS_MappingQualityRankSumTest();
    final String key1 = GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY;
    final String key2 = GATKVCFConstants.AS_MAP_QUAL_RANK_SUM_KEY;
    final int[] altMappingQualities = { 10, 20 };
    final int[] refMappingQualities = { 100, 110 };
    final List<GATKRead> refReads = Arrays.stream(refMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
    final List<GATKRead> altReads = Arrays.stream(altMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
    final ReadLikelihoods<Allele> likelihoods = AnnotationArtificialData.makeLikelihoods(sample1, refReads, altReads, -100.0, -100.0, REF, ALT);
    final ReferenceContext ref = null;
    final VariantContext vc = makeVC(REF, ALT);
    final Map<String, Object> annotateRaw = ann.annotateRawData(ref, vc, likelihoods);
    final Map<String, Object> annotate = ann.annotate(ref, vc, likelihoods);
    final String expected = refMappingQualities[0] + ",1," + refMappingQualities[1] + ",1" + AS_RankSumTest.PRINT_DELIM + altMappingQualities[0] + ",1," + altMappingQualities[1] + ",1";
    Assert.assertEquals(annotate.get(key1), expected);
    Assert.assertEquals(annotateRaw.get(key1), expected);
    Assert.assertEquals(ann.getDescriptions().size(), 1);
    Assert.assertEquals(ann.getDescriptions().get(0).getID(), key1);
    Assert.assertEquals(ann.getKeyNames().size(), 1);
    Assert.assertEquals(ann.getKeyNames().get(0), key2);
}
Also used : TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) Arrays(java.util.Arrays) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) Test(org.testng.annotations.Test) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) AS_RankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RankSumTest) List(java.util.List) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) Map(java.util.Map) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) MannWhitneyU(org.broadinstitute.hellbender.utils.MannWhitneyU) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AS_RankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RankSumTest) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) AS_MappingQualityRankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_MappingQualityRankSumTest) Test(org.testng.annotations.Test) AS_RankSumTest(org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RankSumTest)

Aggregations

ReadLikelihoods (org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods)22 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)17 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 Allele (htsjdk.variant.variantcontext.Allele)11 Test (org.testng.annotations.Test)11 GATKVCFConstants (org.broadinstitute.hellbender.utils.variant.GATKVCFConstants)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 java.util (java.util)9 Collectors (java.util.stream.Collectors)9 AlleleList (org.broadinstitute.hellbender.utils.genotyper.AlleleList)9 IndexedAlleleList (org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList)9 IndexedSampleList (org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList)9 Assert (org.testng.Assert)7 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)6 VCFConstants (htsjdk.variant.vcf.VCFConstants)6 List (java.util.List)6 GenotypesContext (htsjdk.variant.variantcontext.GenotypesContext)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 TextCigarCodec (htsjdk.samtools.TextCigarCodec)4 Genotype (htsjdk.variant.variantcontext.Genotype)4