Search in sources :

Example 66 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.

private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext call : callsVariants) {
        final List<Target> overlappingTargets = targets.targets(call);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        if (!overlappingTruth.isEmpty()) {
            continue;
        }
        @SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
        final VariantContext matchingOutput = matchingOutputOptional.get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final Genotype callGenotype = call.getGenotype(sample);
            final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
            final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
            Assert.assertEquals(expectedCall, actualCall);
            final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            Assert.assertEquals(actualDiscovered, expectedDiscovered);
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            if (expectedCall.isCalled() && actualDiscovered) {
                expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
            }
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
            if (expectedCall.isCalled()) {
                sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
        Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 67 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method testRefConfidencePartialReads.

@Test
public void testRefConfidencePartialReads() {
    final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples, 2);
    final IndependentSampleGenotypesModel genotypingModel = new IndependentSampleGenotypesModel();
    final String ref = "ACGTAACCGGTT";
    for (int readLen = 3; readLen < ref.length(); readLen++) {
        for (int start = 0; start < ref.length() - readLen; start++) {
            final RefConfData data = new RefConfData(ref, 0);
            final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
            final List<VariantContext> calls = Collections.emptyList();
            data.getActiveRegion().add(data.makeRead(start, readLen));
            final ReadLikelihoods<Haplotype> likelihoods = createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
            final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), 0));
            for (int i = start; i < readLen + start; i++) expectedDPs.set(i, 1);
            final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, calls);
            checkReferenceModelResult(data, contexts, expectedDPs, calls);
        }
    }
}
Also used : IndependentSampleGenotypesModel(org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel) VariantContext(htsjdk.variant.variantcontext.VariantContext) HomogeneousPloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel) PloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel) HomogeneousPloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 68 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method checkReferenceModelResult.

private void checkReferenceModelResult(final RefConfData data, final List<VariantContext> contexts, final List<Integer> expectedDPs, final List<VariantContext> calls) {
    Assert.assertNotNull(contexts);
    final SimpleInterval loc = data.getActiveRegion().getExtendedSpan();
    final List<Boolean> seenBP = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), false));
    for (int i = 0; i < loc.size(); i++) {
        final GenomeLoc curPos = parser.createGenomeLoc(loc.getContig(), loc.getStart() + i);
        final VariantContext call = model.getOverlappingVariantContext(curPos, calls);
        final VariantContext refModel = model.getOverlappingVariantContext(curPos, contexts);
        if (!data.getActiveRegion().getSpan().contains(curPos)) {
            // part of the extended interval, but not the full interval
            Assert.assertNull(refModel);
            continue;
        }
        if (call != null) {
            if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC) {
                //Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
                // must be a deletion.
                Assert.assertTrue(call.getReference().length() > 1);
                // the deletion must not start at the same position
                Assert.assertTrue(call.getStart() < refModel.getStart());
                Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(), refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), // the reference must be the same.
                "" + data.getRefHap());
                // No confidence in the reference hom-ref call across the deletion
                Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0);
                // the reference and the lonelly <NON_REF>
                Assert.assertEquals(refModel.getAlleles().size(), 2);
                Assert.assertEquals(refModel.getAlleles().get(1), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
            } else {
                Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
            }
        } else {
            final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getSpan().getStart());
            Assert.assertEquals(refModel.getStart(), loc.getStart() + i);
            Assert.assertEquals(refModel.getEnd(), loc.getStart() + i);
            Assert.assertFalse(refModel.hasLog10PError());
            Assert.assertEquals(refModel.getAlternateAlleles().size(), 1);
            Assert.assertEquals(refModel.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
            Assert.assertTrue(refModel.hasGenotype(sample));
            final Genotype g = refModel.getGenotype(sample);
            Assert.assertTrue(g.hasAD());
            Assert.assertTrue(g.hasDP());
            Assert.assertEquals(g.getDP(), expectedDP);
            Assert.assertTrue(g.hasGQ());
            Assert.assertTrue(g.hasPL());
        }
        final VariantContext vc = call == null ? refModel : call;
        if (curPos.getStart() == vc.getStart()) {
            for (int pos = vc.getStart(); pos <= vc.getEnd(); pos++) {
                final int j = pos - data.getActiveRegion().getSpan().getStart();
                Assert.assertFalse(seenBP.get(j));
                seenBP.set(j, true);
            }
        }
    }
    for (int i = 0; i < seenBP.size(); i++) {
        Assert.assertEquals((boolean) seenBP.get(i), true);
    }
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) GenomeLoc(org.broadinstitute.hellbender.utils.GenomeLoc)

Example 69 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method testOverlappingVariantContext.

@Test
public void testOverlappingVariantContext() {
    final VariantContext vc10 = GATKVariantContextUtils.makeFromAlleles("test", "1", 10, Arrays.asList("A", "C"));
    final VariantContext vc13 = GATKVariantContextUtils.makeFromAlleles("test", "1", 13, Arrays.asList("A", "C"));
    final VariantContext vc12_15 = GATKVariantContextUtils.makeFromAlleles("test", "1", 12, Arrays.asList("ACAT", "A"));
    final VariantContext vc18 = GATKVariantContextUtils.makeFromAlleles("test", "1", 18, Arrays.asList("A", "ACAT"));
    final List<VariantContext> calls = Arrays.asList(vc13, vc12_15, vc18, vc10);
    checkOverlapping(8, calls, null);
    checkOverlapping(9, calls, null);
    checkOverlapping(10, calls, vc10);
    checkOverlapping(11, calls, null);
    checkOverlapping(12, calls, vc12_15);
    checkOverlapping(13, calls, vc13);
    checkOverlapping(14, calls, vc12_15);
    checkOverlapping(15, calls, vc12_15);
    checkOverlapping(16, calls, null);
    checkOverlapping(17, calls, null);
    checkOverlapping(18, calls, vc18);
    checkOverlapping(19, calls, null);
    checkOverlapping(20, calls, null);
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 70 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method checkOverlapping.

private void checkOverlapping(final int pos, Collection<VariantContext> calls, final VariantContext expected) {
    final GenomeLoc loc = parser.createGenomeLoc(parser.getSequenceDictionary().getSequences().get(0).getSequenceName(), pos, pos);
    final VariantContext actual = model.getOverlappingVariantContext(loc, calls);
    Assert.assertEquals(actual, expected);
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) GenomeLoc(org.broadinstitute.hellbender.utils.GenomeLoc)

Aggregations

VariantContext (htsjdk.variant.variantcontext.VariantContext)237 Test (org.testng.annotations.Test)119 File (java.io.File)98 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)68 Allele (htsjdk.variant.variantcontext.Allele)55 Genotype (htsjdk.variant.variantcontext.Genotype)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)43 Collectors (java.util.stream.Collectors)43 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)43 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)41 FeatureDataSource (org.broadinstitute.hellbender.engine.FeatureDataSource)36 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)35 java.util (java.util)33 IOException (java.io.IOException)25 Assert (org.testng.Assert)25 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)22 Target (org.broadinstitute.hellbender.tools.exome.Target)22 DataProvider (org.testng.annotations.DataProvider)22