use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class HomRefBlockUnitTest method testToVariantContext.
@Test
public void testToVariantContext() {
final VariantContext vc = getVariantContext();
final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, vc.getAlleles());
final Genotype genotype1 = gb.GQ(15).DP(6).PL(new int[] { 0, 10, 100 }).make();
final Genotype genotype2 = gb.GQ(17).DP(10).PL(new int[] { 0, 5, 80 }).make();
final HomRefBlock block = getHomRefBlock(vc);
block.add(vc.getEnd(), genotype1);
block.add(vc.getEnd() + 1, genotype2);
final VariantContext newVc = block.toVariantContext(SAMPLE_NAME);
Assert.assertEquals(newVc.getGenotypes().size(), 1);
final Genotype genotype = newVc.getGenotypes().get(0);
//dp should be median of the added DPs
Assert.assertEquals(genotype.getDP(), 8);
//GQ should have been recalculated with the minPls
Assert.assertEquals(genotype.getGQ(), 5);
Assert.assertTrue(genotype.getAlleles().stream().allMatch(a -> a.equals(REF)));
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class HaplotypeCallerSparkIntegrationTest method testFastGenotypeIsSerializable.
@Test
public void testFastGenotypeIsSerializable() {
Genotype genotype = GenotypeBuilder.create("sample1", Collections.nCopies(2, Allele.create("C", false)));
SparkTestUtils.roundTripInKryo(genotype, genotype.getClass(), SparkContextFactory.getTestSparkContext().getConf());
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class QualByDepth method annotate.
@Override
public Map<String, Object> annotate(final ReferenceContext ref, final VariantContext vc, final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(vc);
if (!vc.hasLog10PError()) {
return Collections.emptyMap();
}
final GenotypesContext genotypes = vc.getGenotypes();
if (genotypes == null || genotypes.isEmpty()) {
return Collections.emptyMap();
}
int depth = 0;
int ADrestrictedDepth = 0;
for (final Genotype genotype : genotypes) {
// we care only about variant calls with likelihoods
if (!genotype.isHet() && !genotype.isHomVar()) {
continue;
}
// if we have the AD values for this sample, let's make sure that the variant depth is greater than 1!
if (genotype.hasAD()) {
final int[] AD = genotype.getAD();
final int totalADdepth = (int) MathUtils.sum(AD);
if (totalADdepth != 0) {
if (totalADdepth - AD[0] > 1) {
ADrestrictedDepth += totalADdepth;
}
depth += totalADdepth;
}
} else if (likelihoods != null) {
depth += likelihoods.sampleReadCount(likelihoods.indexOfSample(genotype.getSampleName()));
} else if (genotype.hasDP()) {
depth += genotype.getDP();
}
}
// if the AD-restricted depth is a usable value (i.e. not zero), then we should use that one going forward
if (ADrestrictedDepth > 0) {
depth = ADrestrictedDepth;
}
if (depth == 0) {
return Collections.emptyMap();
}
final double qual = -10.0 * vc.getLog10PError();
double QD = qual / depth;
// Hack: see note in the fixTooHighQD method below
QD = fixTooHighQD(QD);
return Collections.singletonMap(getKeyNames().get(0), String.format("%.2f", QD));
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class SampleList method annotate.
@Override
public Map<String, Object> annotate(final ReferenceContext ref, final VariantContext vc, final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(vc);
if (vc.isMonomorphicInSamples() || !vc.hasGenotypes()) {
return Collections.emptyMap();
}
final StringBuilder samples = new StringBuilder();
for (final Genotype genotype : vc.getGenotypesOrderedByName()) {
if (genotype.isCalled() && !genotype.isHomRef()) {
if (samples.length() > 0) {
samples.append(",");
}
samples.append(genotype.getSampleName());
}
}
if (samples.length() == 0) {
return Collections.emptyMap();
}
return Collections.singletonMap(getKeyNames().get(0), samples.toString());
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class StrandArtifact method annotate.
@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(gb);
Utils.nonNull(vc);
Utils.nonNull(likelihoods);
// do not annotate the genotype fields for normal
if (g.isHomRef()) {
return;
}
pi.put(NO_ARTIFACT, 0.95);
pi.put(ART_FWD, 0.025);
pi.put(ART_REV, 0.025);
// We use the allele with highest LOD score
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
final int numAltReads = numFwdAltReads + numRevAltReads;
final int numReads = numFwdReads + numRevReads;
final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
/*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
// the integrand is a polynomial of degree n, where n is the number of reads at the locus
// thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
/*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
/*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
// For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
// the posterior probability of p(f | x^+, x^-, z)
final int numSamplePoints = 100;
final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
for (int i = 0; i < samplePoints.length; i++) {
final double f = samplePoints[i];
likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
}
final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
// In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
Aggregations