Search in sources :

Example 41 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.

private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext call : callsVariants) {
        final List<Target> overlappingTargets = targets.targets(call);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
        if (!overlappingTruth.isEmpty()) {
            continue;
        }
        @SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
        final VariantContext matchingOutput = matchingOutputOptional.get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final Genotype callGenotype = call.getGenotype(sample);
            final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
            final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
            Assert.assertEquals(expectedCall, actualCall);
            final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            Assert.assertEquals(actualDiscovered, expectedDiscovered);
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            if (expectedCall.isCalled() && actualDiscovered) {
                expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
            }
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
            if (expectedCall.isCalled()) {
                sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
        Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 42 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class XHMMModelUnitTest method testLogEmissionProbability.

@Test(dataProvider = "testData", dependsOnMethods = "testInstantiation")
public void testLogEmissionProbability(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
    final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
    final Target target = new Target("NAME");
    final double logDenominator = Math.log(Math.sqrt(2 * Math.PI));
    final Function<Double, Double> neutralEmission = x -> -.5 * (x * x) - logDenominator;
    final Function<Double, Double> deletionEmission = x -> -.5 * Math.pow(x - deletionMean, 2) - logDenominator;
    final Function<Double, Double> duplicationEmission = x -> -.5 * Math.pow(x - duplicationMean, 2) - logDenominator;
    for (final double coverage : TEST_COVERAGE_VALUES) {
        final double neutralObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.NEUTRAL, target);
        final double deletionObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.DELETION, target);
        final double duplicationObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.DUPLICATION, target);
        final double neutralExpected = neutralEmission.apply(coverage);
        final double deletionExpected = deletionEmission.apply(coverage);
        final double duplicationExpected = duplicationEmission.apply(coverage);
        Assert.assertEquals(neutralObserved, neutralExpected, "neutral emission for " + coverage);
        Assert.assertEquals(deletionObserved, deletionExpected);
        Assert.assertEquals(duplicationObserved, duplicationExpected);
    }
}
Also used : HashSet(java.util.HashSet) Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) DataProvider(org.testng.annotations.DataProvider) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) CopyNumberTriStateTransitionProbabilityCache(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateTransitionProbabilityCache) Test(org.testng.annotations.Test) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Function(java.util.function.Function) Target(org.broadinstitute.hellbender.tools.exome.Target) Test(org.testng.annotations.Test)

Example 43 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class XHMMModelUnitTest method testLogTransitionProbabilityInDifferentChromosomes.

// different chromosomes are independent Markov chains; hence the transition probabilities should equal the prior
// probabilities
@Test(dataProvider = "testData", dependsOnMethods = { "testInstantiation", "testLogPrior" })
public void testLogTransitionProbabilityInDifferentChromosomes(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
    final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
    final Target fromTarget = new Target("target1", new SimpleInterval("1", 1, 100));
    final Target toTarget = new Target("target2", new SimpleInterval("2", 1, 100));
    assertTransitionProbabilities(eventStartProbability, meanEventSize, model, fromTarget, toTarget);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 44 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class XHMMModelUnitTest method testDistance.

@Test(dependsOnMethods = "testInstantiation")
public void testDistance() {
    final XHMMModel model = new XHMMModel(0.5, 10, -1, 1);
    // different chromosomes
    Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 1, 100)), new Target("target2", new SimpleInterval("2", 1, 100))), Double.POSITIVE_INFINITY);
    // commutativity
    Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 1, 100)), new Target("target2", new SimpleInterval("1", 200, 300))), XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 200, 300)), new Target("target2", new SimpleInterval("1", 1, 100))));
    Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 200, 300))), 100, EPSILON);
    Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 100, 110))), 45, EPSILON);
    Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 150, 250))), 50, EPSILON);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 45 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.

/**
     * Processes raw read counts and targets:
     * <dl>
     *     <dt> If more than one sample is present in the collection, filters out fully uncovered targets
     *     from read counts and removes the uncovered targets from the target list</dt>
     *
     *     <dt> Otherwise, does nothing and warns the user
     *     </dt>
     * </dl>
     *
     * @param rawReadCounts raw read count collection
     * @param targetList user provided target list
     * @return pair of processed read counts and targets
     */
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
    final ReadCountCollection finalReadCounts;
    final List<Target> finalTargetList;
    /* remove totally uncovered targets */
    if (rawReadCounts.columnNames().size() > 1) {
        finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
        final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
        finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
    } else {
        final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
        final long numAllTargets = rawReadCounts.targets().size();
        logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
        finalReadCounts = rawReadCounts;
        finalTargetList = targetList;
    }
    return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) LogManager(org.apache.logging.log4j.LogManager) Nonnull(javax.annotation.Nonnull) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Aggregations

Target (org.broadinstitute.hellbender.tools.exome.Target)110 Test (org.testng.annotations.Test)56 File (java.io.File)52 Collectors (java.util.stream.Collectors)42 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)42 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)38 IOException (java.io.IOException)32 java.util (java.util)32 IntStream (java.util.stream.IntStream)32 Assert (org.testng.Assert)32 Pair (org.apache.commons.lang3.tuple.Pair)26 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)26 UserException (org.broadinstitute.hellbender.exceptions.UserException)26 Genotype (htsjdk.variant.variantcontext.Genotype)22 List (java.util.List)22 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)22 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)22 DataProvider (org.testng.annotations.DataProvider)22 VariantContext (htsjdk.variant.variantcontext.VariantContext)20 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)20