Search in sources :

Example 1 with ActivityProfileState

use of org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState in project gatk by broadinstitute.

the class AssemblyRegion method determineAssemblyRegionBounds.

/**
     * Helper method for {@link #createFromReadShard} that uses the provided activity profile and locus iterator
     * to generate a set of assembly regions covering the fully-padded span of the provided Shard. The returned
     * assembly regions will not contain any reads.
     *
     * @param shard Shard to divide into assembly regions
     * @param locusIterator Iterator over pileups to be fed to the AssemblyRegionEvaluator
     * @param activityProfile Activity profile to generate the assembly regions
     * @param readsHeader header for the reads
     * @param referenceContext reference data overlapping the shard's extended span (including padding)
     * @param features features overlapping the shard's extended span (including padding)
     * @param evaluator AssemblyRegionEvaluator used to label each locus as either active or inactive
     * @param minRegionSize minimum size for each assembly region
     * @param maxRegionSize maximum size for each assembly region
     * @param assemblyRegionPadding each assembly region will be padded by this amount on each side
     * @return A list of AssemblyRegions covering
     */
private static List<AssemblyRegion> determineAssemblyRegionBounds(final Shard<GATKRead> shard, final LocusIteratorByState locusIterator, final ActivityProfile activityProfile, final SAMFileHeader readsHeader, final ReferenceContext referenceContext, final FeatureContext features, final AssemblyRegionEvaluator evaluator, final int minRegionSize, final int maxRegionSize, final int assemblyRegionPadding) {
    // Use the provided activity profile to determine the bounds of each assembly region:
    List<AssemblyRegion> assemblyRegions = new ArrayList<>();
    locusIterator.forEachRemaining(pileup -> {
        if (!activityProfile.isEmpty()) {
            final boolean forceConversion = pileup.getLocation().getStart() != activityProfile.getEnd() + 1;
            assemblyRegions.addAll(activityProfile.popReadyAssemblyRegions(assemblyRegionPadding, minRegionSize, maxRegionSize, forceConversion));
        }
        if (shard.getPaddedInterval().contains(pileup.getLocation())) {
            final SimpleInterval pileupInterval = new SimpleInterval(pileup.getLocation());
            final ReferenceBases refBase = new ReferenceBases(new byte[] { referenceContext.getBases()[pileup.getLocation().getStart() - referenceContext.getWindow().getStart()] }, pileupInterval);
            final ReferenceContext pileupRefContext = new ReferenceContext(new ReferenceMemorySource(refBase, readsHeader.getSequenceDictionary()), pileupInterval);
            final ActivityProfileState profile = evaluator.isActive(pileup, pileupRefContext, features);
            activityProfile.add(profile);
        }
    });
    assemblyRegions.addAll(activityProfile.popReadyAssemblyRegions(assemblyRegionPadding, minRegionSize, maxRegionSize, true));
    return assemblyRegions;
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 2 with ActivityProfileState

use of org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState in project gatk by broadinstitute.

the class AssemblyRegion method checkStates.

private void checkStates(final SimpleInterval activeRegionLoc) {
    if (!this.supportingStates.isEmpty()) {
        Utils.validateArg(this.supportingStates.size() == activeRegionLoc.size(), () -> "Supporting states wasn't empty but it doesn't have exactly one state per bp in the active region: states " + this.supportingStates.size() + " vs. bp in region = " + activeRegionLoc.size());
        SimpleInterval lastStateLoc = null;
        for (final ActivityProfileState state : this.supportingStates) {
            if (lastStateLoc != null) {
                if (state.getLoc().getStart() != lastStateLoc.getStart() + 1 || !state.getLoc().getContig().equals(lastStateLoc.getContig())) {
                    throw new IllegalArgumentException("Supporting state has an invalid sequence: last state was " + lastStateLoc + " but next state was " + state);
                }
            }
            lastStateLoc = state.getLoc();
        }
    }
}
Also used : ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 3 with ActivityProfileState

use of org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method testSplitAssemblyRegion.

@Test(dataProvider = "SplitAssemblyRegion")
public void testSplitAssemblyRegion(final SimpleInterval regionLoc, final List<SimpleInterval> intervalLocs, final List<SimpleInterval> expectedRegionLocs) {
    for (final boolean addSubstates : Arrays.asList(true, false)) {
        final List<ActivityProfileState> states;
        if (addSubstates) {
            states = new LinkedList<>();
            for (int i = 0; i < regionLoc.size(); i++) states.add(new ActivityProfileState(new SimpleInterval(regionLoc.getContig(), regionLoc.getStart() + i, regionLoc.getStart() + i), 1.0));
        } else {
            states = null;
        }
        final AssemblyRegion region = new AssemblyRegion(regionLoc, states, true, 0, header);
        final List<AssemblyRegion> regions = region.splitAndTrimToIntervals(new LinkedHashSet<>(intervalLocs));
        Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations");
        for (int i = 0; i < expectedRegionLocs.size(); i++) {
            final SimpleInterval expected = expectedRegionLocs.get(i);
            final AssemblyRegion actual = regions.get(i);
            Assert.assertEquals(actual.getSpan(), expected, "Bad region after split");
            Assert.assertEquals(actual.isActive(), region.isActive());
            Assert.assertEquals(actual.getExtension(), region.getExtension());
        }
    }
}
Also used : ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with ActivityProfileState

use of org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method testCreateFromReadShard.

@Test
public void testCreateFromReadShard() {
    final Path testBam = IOUtils.getPath(NA12878_20_21_WGS_bam);
    final File reference = new File(b37_reference_20_21);
    final SimpleInterval shardInterval = new SimpleInterval("20", 10000000, 10001000);
    final SimpleInterval paddedShardInterval = new SimpleInterval(shardInterval.getContig(), shardInterval.getStart() - 100, shardInterval.getEnd() + 100);
    // Traversal settings to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
    final int minRegionSize = 50;
    final int maxRegionSize = 300;
    final int regionPadding = 100;
    final double activeProbThreshold = 0.002;
    final int maxProbPropagationDistance = 50;
    // This mock evaluator returns exactly the values that the GATK 4 HaplotypeCallerEngine's isActive()
    // method returns for this region. We don't have direct access to the HaplotypeCallerEngine since
    // we're in public, so we need to use a mock.
    final AssemblyRegionEvaluator mockEvaluator = new AssemblyRegionEvaluator() {

        private final List<SimpleInterval> activeSites = Arrays.asList(new SimpleInterval("20", 9999996, 9999996), new SimpleInterval("20", 9999997, 9999997), new SimpleInterval("20", 10000117, 10000117), new SimpleInterval("20", 10000211, 10000211), new SimpleInterval("20", 10000439, 10000439), new SimpleInterval("20", 10000598, 10000598), new SimpleInterval("20", 10000694, 10000694), new SimpleInterval("20", 10000758, 10000758), new SimpleInterval("20", 10001019, 10001019));

        @Override
        public ActivityProfileState isActive(AlignmentContext locusPileup, ReferenceContext referenceContext, FeatureContext featureContext) {
            final SimpleInterval pileupInterval = new SimpleInterval(locusPileup);
            return activeSites.contains(pileupInterval) ? new ActivityProfileState(pileupInterval, 1.0) : new ActivityProfileState(pileupInterval, 0.0);
        }
    };
    final List<Pair<SimpleInterval, Boolean>> expectedResults = Arrays.asList(// GATK 3.4's results).
    Pair.of(new SimpleInterval("20", 9999902, 9999953), false), Pair.of(new SimpleInterval("20", 9999954, 10000039), true), Pair.of(new SimpleInterval("20", 10000040, 10000079), false), Pair.of(new SimpleInterval("20", 10000080, 10000154), true), Pair.of(new SimpleInterval("20", 10000155, 10000173), false), Pair.of(new SimpleInterval("20", 10000174, 10000248), true), Pair.of(new SimpleInterval("20", 10000249, 10000401), false), Pair.of(new SimpleInterval("20", 10000402, 10000476), true), Pair.of(new SimpleInterval("20", 10000477, 10000560), false), Pair.of(new SimpleInterval("20", 10000561, 10000635), true), Pair.of(new SimpleInterval("20", 10000636, 10000656), false), Pair.of(new SimpleInterval("20", 10000657, 10000795), true), Pair.of(new SimpleInterval("20", 10000796, 10000981), false), Pair.of(new SimpleInterval("20", 10000982, 10001056), true), Pair.of(new SimpleInterval("20", 10001057, 10001100), false));
    try (final ReadsDataSource readsSource = new ReadsDataSource(testBam);
        final ReferenceDataSource refSource = new ReferenceFileSource(reference)) {
        // Set the shard's read filter to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
        final LocalReadShard shard = new LocalReadShard(shardInterval, paddedShardInterval, readsSource);
        shard.setReadFilter(new CountingReadFilter(new MappingQualityReadFilter(20)).and(new CountingReadFilter(ReadFilterLibrary.MAPPING_QUALITY_AVAILABLE)).and(new CountingReadFilter(ReadFilterLibrary.MAPPED)).and(new CountingReadFilter(ReadFilterLibrary.PRIMARY_ALIGNMENT)).and(new CountingReadFilter(ReadFilterLibrary.NOT_DUPLICATE)).and(new CountingReadFilter(ReadFilterLibrary.PASSES_VENDOR_QUALITY_CHECK)).and(new CountingReadFilter(ReadFilterLibrary.GOOD_CIGAR)).and(new CountingReadFilter(new WellformedReadFilter(readsSource.getHeader()))));
        final Iterable<AssemblyRegion> assemblyRegions = AssemblyRegion.createFromReadShard(shard, readsSource.getHeader(), new ReferenceContext(refSource, paddedShardInterval), new FeatureContext(null, paddedShardInterval), mockEvaluator, minRegionSize, maxRegionSize, regionPadding, activeProbThreshold, maxProbPropagationDistance);
        int regionCount = 0;
        for (final AssemblyRegion region : assemblyRegions) {
            Assert.assertTrue(regionCount < expectedResults.size(), "Too many regions returned from AssemblyRegion.createFromReadShard()");
            Assert.assertEquals(region.getSpan(), expectedResults.get(regionCount).getLeft(), "Wrong interval for region");
            Assert.assertEquals(region.isActive(), expectedResults.get(regionCount).getRight().booleanValue(), "Region incorrectly marked as active/inactive");
            ++regionCount;
        }
        Assert.assertEquals(regionCount, expectedResults.size(), "Too few regions returned from AssemblyRegion.createFromReadShard()");
    }
}
Also used : Path(java.nio.file.Path) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Pair(org.apache.commons.lang3.tuple.Pair) MappingQualityReadFilter(org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 5 with ActivityProfileState

use of org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState in project gatk by broadinstitute.

the class ActivityProfileStateUnitTest method testActiveProfileResultProvider.

@Test(dataProvider = "ActiveProfileResultProvider")
public void testActiveProfileResultProvider(SimpleInterval loc, final double prob, ActivityProfileState.Type maybeState, final Number maybeNumber) {
    final ActivityProfileState apr = maybeState == null ? new ActivityProfileState(loc, prob) : new ActivityProfileState(loc, prob, maybeState, maybeNumber);
    Assert.assertEquals(apr.getLoc(), loc);
    Assert.assertEquals(apr.getOffset(loc), 0);
    Assert.assertNotNull(apr.toString());
    Assert.assertEquals(apr.isActiveProb(), prob);
    Assert.assertEquals(apr.getResultState(), maybeState == null ? ActivityProfileState.Type.NONE : maybeState);
    Assert.assertEquals(apr.getResultValue(), maybeState == null ? null : maybeNumber);
}
Also used : ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) Test(org.testng.annotations.Test)

Aggregations

ActivityProfileState (org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState)10 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)9 Test (org.testng.annotations.Test)7 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 File (java.io.File)3 CachingIndexedFastaSequenceFile (org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile)3 ReadFilter (org.broadinstitute.hellbender.engine.filters.ReadFilter)2 ReadFilteringIterator (org.broadinstitute.hellbender.utils.iterators.ReadFilteringIterator)2 LocusIteratorByState (org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState)2 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)2 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)1 Path (java.nio.file.Path)1 Pair (org.apache.commons.lang3.tuple.Pair)1 CountingReadFilter (org.broadinstitute.hellbender.engine.filters.CountingReadFilter)1 MappingQualityReadFilter (org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter)1 WellformedReadFilter (org.broadinstitute.hellbender.engine.filters.WellformedReadFilter)1 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)1 DataProvider (org.testng.annotations.DataProvider)1