Search in sources :

Example 31 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.

private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext call : callsVariants) {
        final List<Target> overlappingTargets = targets.targets(call);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        if (!overlappingTruth.isEmpty()) {
            continue;
        }
        @SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
        final VariantContext matchingOutput = matchingOutputOptional.get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final Genotype callGenotype = call.getGenotype(sample);
            final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
            final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
            Assert.assertEquals(expectedCall, actualCall);
            final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            Assert.assertEquals(actualDiscovered, expectedDiscovered);
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            if (expectedCall.isCalled() && actualDiscovered) {
                expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
            }
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
            if (expectedCall.isCalled()) {
                sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
        Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 32 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.

private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegment(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 33 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMModelUnitTest method testLogTransitionProbabilityOverlappingTargets.

@Test(dataProvider = "testData", dependsOnMethods = "testInstantiation")
public void testLogTransitionProbabilityOverlappingTargets(final double eventRate, final double meanEventSize, final double deletionMean, final double duplicationMean) {
    final XHMMModel model = new XHMMModel(eventRate, meanEventSize, deletionMean, duplicationMean);
    final Target fromTarget = new Target("target1", new SimpleInterval("1", 1, 100));
    final Target toTarget = new Target("target2", new SimpleInterval("1", 50, 150));
    assertTransitionProbabilities(eventRate, meanEventSize, model, fromTarget, toTarget);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 34 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMSegmentCallerIntegrationTest method assertOutputIsInOrder.

private void assertOutputIsInOrder(final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> outputRecords, final TargetCollection<Target> targets) {
    for (int i = 1; i < outputRecords.size(); i++) {
        final HiddenStateSegmentRecord<CopyNumberTriState, Target> nextRecord = outputRecords.get(i);
        final HiddenStateSegmentRecord<CopyNumberTriState, Target> previousRecord = outputRecords.get(i - 1);
        final IndexRange nextRange = targets.indexRange(nextRecord.getSegment());
        final IndexRange previousRange = targets.indexRange(previousRecord.getSegment());
        Assert.assertTrue(nextRange.from >= previousRange.from);
    }
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)

Example 35 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMSegmentCallerIntegrationTest method testRunCommandLine.

//TODO: this test used to contain a test of concordance with XHMM.  It no longer does that because our model has
//TODO: diverged from XHMM's.  Eventually the right thing to do is use the simulateChain() method to generate
//TODO: simulated data for some artificial set of CNV segments and to test concordance with those segments.
//TODO: however we still use XHMM's emission model, which is both not generative and quite silly.  Once we
//TODO: have a generative model of coverage we can modify simulateChain() accordingly and then write a concordance
//TODO: test here.  Until then, we do not have an integration test but we do have our ongoing evaluations, which
//TODO: show the superiority of our modifications versus the original XHMM model.
@Test(dataProvider = "simulatedChainData")
public File testRunCommandLine(final XHMMData chain) {
    final File inputFile = writeChainInTempFile(chain);
    final File outputFile = createTempFile("output", ".tab");
    runCommandLine(chain, inputFile, outputFile);
    Assert.assertTrue(outputFile.exists());
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(REALISTIC_TARGETS_FILE);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> outputRecords = readOutputRecords(outputFile);
    assertOutputIsInOrder(outputRecords, targets);
    assertOutputHasConsistentNumberOfTargets(outputRecords, targets);
    final Map<String, List<HiddenStateSegmentRecord<CopyNumberTriState, Target>>> outputRecordsBySample = splitOutputRecordBySample(outputRecords);
    assertSampleNames(outputRecordsBySample.keySet(), chain);
    for (final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> sampleRecords : outputRecordsBySample.values()) {
        assertSampleSegmentsCoverAllTargets(sampleRecords, targets);
        assertSampleSegmentsCoordinates(sampleRecords, targets);
    }
    return outputFile;
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

Target (org.broadinstitute.hellbender.tools.exome.Target)110 Test (org.testng.annotations.Test)56 File (java.io.File)52 Collectors (java.util.stream.Collectors)42 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)42 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)38 IOException (java.io.IOException)32 java.util (java.util)32 IntStream (java.util.stream.IntStream)32 Assert (org.testng.Assert)32 Pair (org.apache.commons.lang3.tuple.Pair)26 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)26 UserException (org.broadinstitute.hellbender.exceptions.UserException)26 Genotype (htsjdk.variant.variantcontext.Genotype)22 List (java.util.List)22 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)22 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)22 DataProvider (org.testng.annotations.DataProvider)22 VariantContext (htsjdk.variant.variantcontext.VariantContext)20 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)20