Search in sources :

Example 31 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.

private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
    final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
    final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
    final GenotypeBuilder builder = new GenotypeBuilder(sample);
    // Set the call allele.
    builder.alleles(Collections.singletonList(calledAllele));
    // Set the truth allele.
    builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
    // Annotate the genotype with the number of calls.
    builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
    // When there is more than one qualified type of event we indicate how many.
    builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
    // Calculate the length in targets of the call as the sum across all calls.
    builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
    // Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
    builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
    // Calculate the truth copy fraction.
    builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
    // Calculate the truth call quality.
    final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
    builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
    // Set genotype filters:
    final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
    builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
    // Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
    if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
        final EvaluationClass evaluationClass;
        if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
            evaluationClass = EvaluationClass.FALSE_NEGATIVE;
        } else if (calledAlleles.size() == 1) {
            evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
            EvaluationClass.DISCORDANT_POSITIVE;
        } else {
            evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
        }
        builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
    }
    return builder.make();
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) Allele(htsjdk.variant.variantcontext.Allele) htsjdk.variant.vcf(htsjdk.variant.vcf) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) Argument(org.broadinstitute.barclay.argparser.Argument) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Function(java.util.function.Function) Pair(org.apache.commons.lang3.tuple.Pair) StreamSupport(java.util.stream.StreamSupport) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) Locatable(htsjdk.samtools.util.Locatable) CommandLineProgram(org.broadinstitute.hellbender.cmdline.CommandLineProgram) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) HMMPostProcessor(org.broadinstitute.hellbender.utils.hmm.segmentation.HMMPostProcessor) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Target(org.broadinstitute.hellbender.tools.exome.Target) XHMMSegmentCaller(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentCaller) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder)

Example 32 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.

private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) {
    final Genotype truthGenotype = truth.getGenotype(sample);
    // if there is no truth genotype for that sample, we output the "empty" genotype.
    if (truthGenotype == null) {
        return GenotypeBuilder.create(sample, Collections.emptyList());
    }
    final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
    final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);
    final List<Pair<VariantContext, Genotype>> allCalls = calls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(pair -> pair.getRight() != null).filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)).collect(Collectors.toList());
    final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);
    return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) Allele(htsjdk.variant.variantcontext.Allele) htsjdk.variant.vcf(htsjdk.variant.vcf) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) Argument(org.broadinstitute.barclay.argparser.Argument) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Function(java.util.function.Function) Pair(org.apache.commons.lang3.tuple.Pair) StreamSupport(java.util.stream.StreamSupport) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) Locatable(htsjdk.samtools.util.Locatable) CommandLineProgram(org.broadinstitute.hellbender.cmdline.CommandLineProgram) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) HMMPostProcessor(org.broadinstitute.hellbender.utils.hmm.segmentation.HMMPostProcessor) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Target(org.broadinstitute.hellbender.tools.exome.Target) XHMMSegmentCaller(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentCaller) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Genotype(htsjdk.variant.variantcontext.Genotype) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 33 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.

private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext truth : truthVariants) {
        final List<Target> overlappingTargets = targets.targets(truth);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        if (overlappingTargets.isEmpty()) {
            Assert.assertTrue(overlappingOutput.isEmpty());
            continue;
        }
        @SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            sampleCalls.forEach(p -> {
                expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
            });
            final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
            Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
            final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
            final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
            Assert.assertEquals(actualTotalCount, expectedTotalCount);
            final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
            final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
            Assert.assertEquals(observedTargetCount, expectedTargetCount);
            final Allele truthCallAllele = outputTruthAllele(outputGenotype);
            final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            if (sampleCalls.size() > 0 && !isMixed) {
                final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
                final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
                Assert.assertEquals(actualCall, expectedCall);
                sampleCallsCount[expectedCall.index()]++;
                if (!truthCallAllele.isReference()) {
                    if (truthCallAllele.equals(actualCall)) {
                        Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
                    } else if (!truthCallAllele.isNoCall()) {
                        Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
                    } else {
                        Assert.assertNull(evalClass);
                    }
                } else if (truthCallAllele.isReference()) {
                    Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
                }
            } else {
                Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
                if (sampleCalls.isEmpty()) {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
                } else {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
                    final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
                }
            }
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 34 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method testCreateFromReadShard.

@Test
public void testCreateFromReadShard() {
    final Path testBam = IOUtils.getPath(NA12878_20_21_WGS_bam);
    final File reference = new File(b37_reference_20_21);
    final SimpleInterval shardInterval = new SimpleInterval("20", 10000000, 10001000);
    final SimpleInterval paddedShardInterval = new SimpleInterval(shardInterval.getContig(), shardInterval.getStart() - 100, shardInterval.getEnd() + 100);
    // Traversal settings to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
    final int minRegionSize = 50;
    final int maxRegionSize = 300;
    final int regionPadding = 100;
    final double activeProbThreshold = 0.002;
    final int maxProbPropagationDistance = 50;
    // This mock evaluator returns exactly the values that the GATK 4 HaplotypeCallerEngine's isActive()
    // method returns for this region. We don't have direct access to the HaplotypeCallerEngine since
    // we're in public, so we need to use a mock.
    final AssemblyRegionEvaluator mockEvaluator = new AssemblyRegionEvaluator() {

        private final List<SimpleInterval> activeSites = Arrays.asList(new SimpleInterval("20", 9999996, 9999996), new SimpleInterval("20", 9999997, 9999997), new SimpleInterval("20", 10000117, 10000117), new SimpleInterval("20", 10000211, 10000211), new SimpleInterval("20", 10000439, 10000439), new SimpleInterval("20", 10000598, 10000598), new SimpleInterval("20", 10000694, 10000694), new SimpleInterval("20", 10000758, 10000758), new SimpleInterval("20", 10001019, 10001019));

        @Override
        public ActivityProfileState isActive(AlignmentContext locusPileup, ReferenceContext referenceContext, FeatureContext featureContext) {
            final SimpleInterval pileupInterval = new SimpleInterval(locusPileup);
            return activeSites.contains(pileupInterval) ? new ActivityProfileState(pileupInterval, 1.0) : new ActivityProfileState(pileupInterval, 0.0);
        }
    };
    final List<Pair<SimpleInterval, Boolean>> expectedResults = Arrays.asList(// GATK 3.4's results).
    Pair.of(new SimpleInterval("20", 9999902, 9999953), false), Pair.of(new SimpleInterval("20", 9999954, 10000039), true), Pair.of(new SimpleInterval("20", 10000040, 10000079), false), Pair.of(new SimpleInterval("20", 10000080, 10000154), true), Pair.of(new SimpleInterval("20", 10000155, 10000173), false), Pair.of(new SimpleInterval("20", 10000174, 10000248), true), Pair.of(new SimpleInterval("20", 10000249, 10000401), false), Pair.of(new SimpleInterval("20", 10000402, 10000476), true), Pair.of(new SimpleInterval("20", 10000477, 10000560), false), Pair.of(new SimpleInterval("20", 10000561, 10000635), true), Pair.of(new SimpleInterval("20", 10000636, 10000656), false), Pair.of(new SimpleInterval("20", 10000657, 10000795), true), Pair.of(new SimpleInterval("20", 10000796, 10000981), false), Pair.of(new SimpleInterval("20", 10000982, 10001056), true), Pair.of(new SimpleInterval("20", 10001057, 10001100), false));
    try (final ReadsDataSource readsSource = new ReadsDataSource(testBam);
        final ReferenceDataSource refSource = new ReferenceFileSource(reference)) {
        // Set the shard's read filter to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
        final LocalReadShard shard = new LocalReadShard(shardInterval, paddedShardInterval, readsSource);
        shard.setReadFilter(new CountingReadFilter(new MappingQualityReadFilter(20)).and(new CountingReadFilter(ReadFilterLibrary.MAPPING_QUALITY_AVAILABLE)).and(new CountingReadFilter(ReadFilterLibrary.MAPPED)).and(new CountingReadFilter(ReadFilterLibrary.PRIMARY_ALIGNMENT)).and(new CountingReadFilter(ReadFilterLibrary.NOT_DUPLICATE)).and(new CountingReadFilter(ReadFilterLibrary.PASSES_VENDOR_QUALITY_CHECK)).and(new CountingReadFilter(ReadFilterLibrary.GOOD_CIGAR)).and(new CountingReadFilter(new WellformedReadFilter(readsSource.getHeader()))));
        final Iterable<AssemblyRegion> assemblyRegions = AssemblyRegion.createFromReadShard(shard, readsSource.getHeader(), new ReferenceContext(refSource, paddedShardInterval), new FeatureContext(null, paddedShardInterval), mockEvaluator, minRegionSize, maxRegionSize, regionPadding, activeProbThreshold, maxProbPropagationDistance);
        int regionCount = 0;
        for (final AssemblyRegion region : assemblyRegions) {
            Assert.assertTrue(regionCount < expectedResults.size(), "Too many regions returned from AssemblyRegion.createFromReadShard()");
            Assert.assertEquals(region.getSpan(), expectedResults.get(regionCount).getLeft(), "Wrong interval for region");
            Assert.assertEquals(region.isActive(), expectedResults.get(regionCount).getRight().booleanValue(), "Region incorrectly marked as active/inactive");
            ++regionCount;
        }
        Assert.assertEquals(regionCount, expectedResults.size(), "Too few regions returned from AssemblyRegion.createFromReadShard()");
    }
}
Also used : Path(java.nio.file.Path) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Pair(org.apache.commons.lang3.tuple.Pair) MappingQualityReadFilter(org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 35 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.

private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext truth : truthVariants) {
        final List<Target> overlappingTargets = targets.targets(truth);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        if (overlappingTargets.isEmpty()) {
            Assert.assertTrue(overlappingOutput.isEmpty());
            continue;
        }
        @SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            sampleCalls.forEach(p -> {
                expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
            });
            final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
            Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
            final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
            final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
            Assert.assertEquals(actualTotalCount, expectedTotalCount);
            final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
            final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
            Assert.assertEquals(observedTargetCount, expectedTargetCount);
            final Allele truthCallAllele = outputTruthAllele(outputGenotype);
            final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            if (sampleCalls.size() > 0 && !isMixed) {
                final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
                final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
                Assert.assertEquals(actualCall, expectedCall);
                sampleCallsCount[expectedCall.index()]++;
                if (!truthCallAllele.isReference()) {
                    if (truthCallAllele.equals(actualCall)) {
                        Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
                    } else if (!truthCallAllele.isNoCall()) {
                        Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
                    } else {
                        Assert.assertNull(evalClass);
                    }
                } else if (truthCallAllele.isReference()) {
                    Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
                }
            } else {
                Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
                if (sampleCalls.isEmpty()) {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
                } else {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
                    final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
                }
            }
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Aggregations

Pair (org.apache.commons.lang3.tuple.Pair)685 ArrayList (java.util.ArrayList)209 List (java.util.List)154 Test (org.junit.Test)150 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)142 HashMap (java.util.HashMap)123 Collectors (java.util.stream.Collectors)123 Map (java.util.Map)112 Message (com.microsoft.azure.sdk.iot.device.Message)71 IOException (java.io.IOException)70 MutablePair (org.apache.commons.lang3.tuple.MutablePair)64 java.util (java.util)55 IotHubTransportMessage (com.microsoft.azure.sdk.iot.device.transport.IotHubTransportMessage)52 Set (java.util.Set)49 StringUtils (org.apache.commons.lang3.StringUtils)48 File (java.io.File)46 Optional (java.util.Optional)45 Arrays (java.util.Arrays)44 HashSet (java.util.HashSet)40 Test (org.junit.jupiter.api.Test)39