use of org.broadinstitute.hellbender.engine.ReferenceContext in project gatk by broadinstitute.
the class StrandArtifact method annotate.
@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(gb);
Utils.nonNull(vc);
Utils.nonNull(likelihoods);
// do not annotate the genotype fields for normal
if (g.isHomRef()) {
return;
}
pi.put(NO_ARTIFACT, 0.95);
pi.put(ART_FWD, 0.025);
pi.put(ART_REV, 0.025);
// We use the allele with highest LOD score
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
final int numAltReads = numFwdAltReads + numRevAltReads;
final int numReads = numFwdReads + numRevReads;
final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
/*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
// the integrand is a polynomial of degree n, where n is the number of reads at the locus
// thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
/*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
/*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
// For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
// the posterior probability of p(f | x^+, x^-, z)
final int numSamplePoints = 100;
final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
for (int i = 0; i < samplePoints.length; i++) {
final double f = samplePoints[i];
likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
}
final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
// In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
use of org.broadinstitute.hellbender.engine.ReferenceContext in project gatk by broadinstitute.
the class MappingQualityRankSumTestUnitTest method testAS_MQRaw.
@Test
public void testAS_MQRaw() {
final AS_RankSumTest ann = new AS_MappingQualityRankSumTest();
final String key1 = GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY;
final String key2 = GATKVCFConstants.AS_MAP_QUAL_RANK_SUM_KEY;
final int[] altMappingQualities = { 10, 20 };
final int[] refMappingQualities = { 100, 110 };
final List<GATKRead> refReads = Arrays.stream(refMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
final List<GATKRead> altReads = Arrays.stream(altMappingQualities).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
final ReadLikelihoods<Allele> likelihoods = AnnotationArtificialData.makeLikelihoods(sample1, refReads, altReads, -100.0, -100.0, REF, ALT);
final ReferenceContext ref = null;
final VariantContext vc = makeVC(REF, ALT);
final Map<String, Object> annotateRaw = ann.annotateRawData(ref, vc, likelihoods);
final Map<String, Object> annotate = ann.annotate(ref, vc, likelihoods);
final String expected = refMappingQualities[0] + ",1," + refMappingQualities[1] + ",1" + AS_RankSumTest.PRINT_DELIM + altMappingQualities[0] + ",1," + altMappingQualities[1] + ",1";
Assert.assertEquals(annotate.get(key1), expected);
Assert.assertEquals(annotateRaw.get(key1), expected);
Assert.assertEquals(ann.getDescriptions().size(), 1);
Assert.assertEquals(ann.getDescriptions().get(0).getID(), key1);
Assert.assertEquals(ann.getKeyNames().size(), 1);
Assert.assertEquals(ann.getKeyNames().get(0), key2);
}
use of org.broadinstitute.hellbender.engine.ReferenceContext in project gatk by broadinstitute.
the class MappingQualityZeroUnitTest method testEmptyLikelihoods.
@Test
public void testEmptyLikelihoods() throws Exception {
final List<GATKRead> reads = Collections.emptyList();
final Map<String, List<GATKRead>> readsBySample = ImmutableMap.of("sample1", reads);
final org.broadinstitute.hellbender.utils.genotyper.SampleList sampleList = new IndexedSampleList(Arrays.asList("sample1"));
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(Arrays.asList(Allele.create("A")));
final ReadLikelihoods<Allele> likelihoods = new ReadLikelihoods<>(sampleList, alleleList, readsBySample);
final VariantContext vc = makeVC();
final ReferenceContext referenceContext = null;
final Map<String, Object> annotate = new MappingQualityZero().annotate(referenceContext, vc, likelihoods);
//strangely, MappingQualityZero returns 0 if likelihoods is empty
final int n = 0;
Assert.assertEquals(annotate.get(VCFConstants.MAPPING_QUALITY_ZERO_KEY), String.valueOf(n));
}
use of org.broadinstitute.hellbender.engine.ReferenceContext in project gatk by broadinstitute.
the class ChromosomeCountsUnitTest method testVC.
@Test
public void testVC() throws Exception {
final VariantContext vc = makeVC();
final ReferenceContext referenceContext = null;
final InfoFieldAnnotation ann = new ChromosomeCounts();
final Map<String, Object> annotate = ann.annotate(referenceContext, vc, null);
//two hets
Assert.assertEquals(annotate.get(VCFConstants.ALLELE_NUMBER_KEY), 4);
Assert.assertEquals(annotate.get(VCFConstants.ALLELE_COUNT_KEY), 2);
Assert.assertEquals(annotate.get(VCFConstants.ALLELE_FREQUENCY_KEY), 0.5);
Assert.assertEquals(ann.getDescriptions(), Arrays.asList(ChromosomeCounts.descriptions));
Assert.assertEquals(ann.getKeyNames(), Arrays.asList(ChromosomeCounts.keyNames));
}
use of org.broadinstitute.hellbender.engine.ReferenceContext in project gatk by broadinstitute.
the class ClippingRankSumTestUnitTest method testClipping.
@Test
public void testClipping() {
final int[] refHardClips = { 10, 0 };
final int[] altHardClips = { 1, 2 };
final List<GATKRead> refReads = Arrays.stream(refHardClips).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
final List<GATKRead> altReads = Arrays.stream(altHardClips).mapToObj(i -> makeRead(i)).collect(Collectors.toList());
final ReadLikelihoods<Allele> likelihoods = AnnotationArtificialData.makeLikelihoods(SAMPLE_1, refReads, altReads, -100.0, -100.0, REF, ALT);
final ReferenceContext ref = null;
final VariantContext vc = makeVC(REF, ALT);
final InfoFieldAnnotation ann = new ClippingRankSumTest();
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
final Map<String, Object> annotate = ann.annotate(ref, vc, likelihoods);
final double zScore = mannWhitneyU.test(new double[] { altHardClips[0], altHardClips[1] }, new double[] { refHardClips[0], refHardClips[1] }, MannWhitneyU.TestType.FIRST_DOMINATES).getZ();
final String zScoreStr = String.format("%.3f", zScore);
Assert.assertEquals(annotate.get(GATKVCFConstants.CLIPPING_RANK_SUM_KEY), zScoreStr);
Assert.assertEquals(ann.getDescriptions().size(), 1);
Assert.assertEquals(ann.getDescriptions().get(0).getID(), GATKVCFConstants.CLIPPING_RANK_SUM_KEY);
Assert.assertEquals(ann.getKeyNames().size(), 1);
Assert.assertEquals(ann.getKeyNames().get(0), GATKVCFConstants.CLIPPING_RANK_SUM_KEY);
}
Aggregations