Search in sources :

Example 1 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class HomRefBlockUnitTest method testToVariantContext.

@Test
public void testToVariantContext() {
    final VariantContext vc = getVariantContext();
    final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, vc.getAlleles());
    final Genotype genotype1 = gb.GQ(15).DP(6).PL(new int[] { 0, 10, 100 }).make();
    final Genotype genotype2 = gb.GQ(17).DP(10).PL(new int[] { 0, 5, 80 }).make();
    final HomRefBlock block = getHomRefBlock(vc);
    block.add(vc.getEnd(), genotype1);
    block.add(vc.getEnd() + 1, genotype2);
    final VariantContext newVc = block.toVariantContext(SAMPLE_NAME);
    Assert.assertEquals(newVc.getGenotypes().size(), 1);
    final Genotype genotype = newVc.getGenotypes().get(0);
    //dp should be median of the added DPs
    Assert.assertEquals(genotype.getDP(), 8);
    //GQ should have been recalculated with the minPls
    Assert.assertEquals(genotype.getGQ(), 5);
    Assert.assertTrue(genotype.getAlleles().stream().allMatch(a -> a.equals(REF)));
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) ArrayList(java.util.ArrayList) List(java.util.List) Assert(org.testng.Assert) VariantContext(htsjdk.variant.variantcontext.VariantContext) HomoSapiensConstants(org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFConstants(htsjdk.variant.vcf.VCFConstants) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 2 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class HomRefBlockUnitTest method testMinMedian.

@Test
public void testMinMedian() {
    final VariantContext vc = getVariantContext();
    final HomRefBlock band = getHomRefBlock(vc);
    final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME);
    gb.alleles(vc.getAlleles());
    int pos = band.getStart();
    band.add(pos++, gb.DP(10).GQ(11).PL(new int[] { 0, 11, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 10);
    band.add(pos++, gb.DP(11).GQ(10).PL(new int[] { 0, 10, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 11);
    band.add(pos++, gb.DP(12).GQ(12).PL(new int[] { 0, 12, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 11);
    band.add(pos++, gb.DP(13).GQ(15).PL(new int[] { 0, 15, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(14).GQ(16).PL(new int[] { 0, 16, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(15).GQ(17).PL(new int[] { 0, 17, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(16).GQ(18).PL(new int[] { 0, 18, 100 }).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 13);
    Assert.assertEquals(band.getSize(), pos - vc.getStart());
    Assert.assertEquals(band.getMinPLs(), new int[] { 0, 10, 100 });
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class StrandArtifact method annotate.

@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb);
    Utils.nonNull(vc);
    Utils.nonNull(likelihoods);
    // do not annotate the genotype fields for normal
    if (g.isHomRef()) {
        return;
    }
    pi.put(NO_ARTIFACT, 0.95);
    pi.put(ART_FWD, 0.025);
    pi.put(ART_REV, 0.025);
    // We use the allele with highest LOD score
    final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
    final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
    final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
    final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
    final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numAltReads = numFwdAltReads + numRevAltReads;
    final int numReads = numFwdReads + numRevReads;
    final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
    final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
    /*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
    // the integrand is a polynomial of degree n, where n is the number of reads at the locus
    // thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
    final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
    final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
    final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
    unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
    /*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
    final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
    unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
    final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
    /*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
    // For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
    // the posterior probability of p(f | x^+, x^-, z)
    final int numSamplePoints = 100;
    final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
    double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
    double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
    for (int i = 0; i < samplePoints.length; i++) {
        final double f = samplePoints[i];
        likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
        likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
    }
    final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
    final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
    // In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
    maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
    gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
    gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) java.util(java.util) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) Allele(htsjdk.variant.variantcontext.Allele) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ)

Example 4 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineStateFilter.

@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
    final Set<String> filters = new HashSet<>(Arrays.asList("BAD!"));
    // Filtering on the variant context
    final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();
    final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
    final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
    final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);
    // Filtering on the genotype
    final List<String> gtFilters = new ArrayList<>(Arrays.asList("WICKED"));
    final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
    final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContext(htsjdk.variant.variantcontext.VariantContext) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 5 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineStateGq.

@Test
public void testGenotypeConcordanceDetermineStateGq() throws Exception {
    final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make();
    final List<Allele> allelesLowGq = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtLowGq = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).GQ(4).make();
    final VariantContext vcLowGq = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowGq).genotypes(gtLowGq).make();
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcNormal, CallState.HET_REF_VAR1, 20, 0);
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);
    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.LOW_GQ, 20, 0);
    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcLowGq, CallState.LOW_GQ, 20, 0);
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContext(htsjdk.variant.variantcontext.VariantContext) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)51 Genotype (htsjdk.variant.variantcontext.Genotype)43 VariantContext (htsjdk.variant.variantcontext.VariantContext)37 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)37 Allele (htsjdk.variant.variantcontext.Allele)34 ArrayList (java.util.ArrayList)30 VCFHeader (htsjdk.variant.vcf.VCFHeader)27 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)23 HashSet (java.util.HashSet)22 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)21 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)20 File (java.io.File)17 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)15 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)14 Collectors (java.util.stream.Collectors)14 HashMap (java.util.HashMap)13 List (java.util.List)13 Set (java.util.Set)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)12 VCFHeaderLineType (htsjdk.variant.vcf.VCFHeaderLineType)11