Search in sources :

Example 1 with CountingReadFilter

use of org.broadinstitute.hellbender.engine.filters.CountingReadFilter in project gatk by broadinstitute.

the class ReadWalker method traverse.

/**
     * Implementation of read-based traversal.
     * Subclasses can override to provide own behavior but default implementation should be suitable for most uses.
     *
     * The default implementation creates filters using {@link #makeReadFilter} and transformers using
     * {@link #makePreReadFilterTransformer()} {@link #makePostReadFilterTransformer()} and then iterates over all reads, applies
     * the pre-filter transformer, the filter, then the post-filter transformer and hands the resulting reads to the {@link #apply}
     * function of the walker (along with additional contextual information, if present, such as reference bases).
     */
@Override
public void traverse() {
    // Process each read in the input stream.
    // Supply reference bases spanning each read, if a reference is available.
    final CountingReadFilter countedFilter = makeReadFilter();
    getTransformedReadStream(countedFilter).forEach(read -> {
        final SimpleInterval readInterval = getReadInterval(read);
        apply(read, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
        progressMeter.update(readInterval);
    });
    logger.info(countedFilter.getSummaryLine());
}
Also used : CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 2 with CountingReadFilter

use of org.broadinstitute.hellbender.engine.filters.CountingReadFilter in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method testCreateFromReadShard.

@Test
public void testCreateFromReadShard() {
    final Path testBam = IOUtils.getPath(NA12878_20_21_WGS_bam);
    final File reference = new File(b37_reference_20_21);
    final SimpleInterval shardInterval = new SimpleInterval("20", 10000000, 10001000);
    final SimpleInterval paddedShardInterval = new SimpleInterval(shardInterval.getContig(), shardInterval.getStart() - 100, shardInterval.getEnd() + 100);
    // Traversal settings to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
    final int minRegionSize = 50;
    final int maxRegionSize = 300;
    final int regionPadding = 100;
    final double activeProbThreshold = 0.002;
    final int maxProbPropagationDistance = 50;
    // This mock evaluator returns exactly the values that the GATK 4 HaplotypeCallerEngine's isActive()
    // method returns for this region. We don't have direct access to the HaplotypeCallerEngine since
    // we're in public, so we need to use a mock.
    final AssemblyRegionEvaluator mockEvaluator = new AssemblyRegionEvaluator() {

        private final List<SimpleInterval> activeSites = Arrays.asList(new SimpleInterval("20", 9999996, 9999996), new SimpleInterval("20", 9999997, 9999997), new SimpleInterval("20", 10000117, 10000117), new SimpleInterval("20", 10000211, 10000211), new SimpleInterval("20", 10000439, 10000439), new SimpleInterval("20", 10000598, 10000598), new SimpleInterval("20", 10000694, 10000694), new SimpleInterval("20", 10000758, 10000758), new SimpleInterval("20", 10001019, 10001019));

        @Override
        public ActivityProfileState isActive(AlignmentContext locusPileup, ReferenceContext referenceContext, FeatureContext featureContext) {
            final SimpleInterval pileupInterval = new SimpleInterval(locusPileup);
            return activeSites.contains(pileupInterval) ? new ActivityProfileState(pileupInterval, 1.0) : new ActivityProfileState(pileupInterval, 0.0);
        }
    };
    final List<Pair<SimpleInterval, Boolean>> expectedResults = Arrays.asList(// GATK 3.4's results).
    Pair.of(new SimpleInterval("20", 9999902, 9999953), false), Pair.of(new SimpleInterval("20", 9999954, 10000039), true), Pair.of(new SimpleInterval("20", 10000040, 10000079), false), Pair.of(new SimpleInterval("20", 10000080, 10000154), true), Pair.of(new SimpleInterval("20", 10000155, 10000173), false), Pair.of(new SimpleInterval("20", 10000174, 10000248), true), Pair.of(new SimpleInterval("20", 10000249, 10000401), false), Pair.of(new SimpleInterval("20", 10000402, 10000476), true), Pair.of(new SimpleInterval("20", 10000477, 10000560), false), Pair.of(new SimpleInterval("20", 10000561, 10000635), true), Pair.of(new SimpleInterval("20", 10000636, 10000656), false), Pair.of(new SimpleInterval("20", 10000657, 10000795), true), Pair.of(new SimpleInterval("20", 10000796, 10000981), false), Pair.of(new SimpleInterval("20", 10000982, 10001056), true), Pair.of(new SimpleInterval("20", 10001057, 10001100), false));
    try (final ReadsDataSource readsSource = new ReadsDataSource(testBam);
        final ReferenceDataSource refSource = new ReferenceFileSource(reference)) {
        // Set the shard's read filter to match the GATK 3/4 HaplotypeCaller settings when the expected output was generated:
        final LocalReadShard shard = new LocalReadShard(shardInterval, paddedShardInterval, readsSource);
        shard.setReadFilter(new CountingReadFilter(new MappingQualityReadFilter(20)).and(new CountingReadFilter(ReadFilterLibrary.MAPPING_QUALITY_AVAILABLE)).and(new CountingReadFilter(ReadFilterLibrary.MAPPED)).and(new CountingReadFilter(ReadFilterLibrary.PRIMARY_ALIGNMENT)).and(new CountingReadFilter(ReadFilterLibrary.NOT_DUPLICATE)).and(new CountingReadFilter(ReadFilterLibrary.PASSES_VENDOR_QUALITY_CHECK)).and(new CountingReadFilter(ReadFilterLibrary.GOOD_CIGAR)).and(new CountingReadFilter(new WellformedReadFilter(readsSource.getHeader()))));
        final Iterable<AssemblyRegion> assemblyRegions = AssemblyRegion.createFromReadShard(shard, readsSource.getHeader(), new ReferenceContext(refSource, paddedShardInterval), new FeatureContext(null, paddedShardInterval), mockEvaluator, minRegionSize, maxRegionSize, regionPadding, activeProbThreshold, maxProbPropagationDistance);
        int regionCount = 0;
        for (final AssemblyRegion region : assemblyRegions) {
            Assert.assertTrue(regionCount < expectedResults.size(), "Too many regions returned from AssemblyRegion.createFromReadShard()");
            Assert.assertEquals(region.getSpan(), expectedResults.get(regionCount).getLeft(), "Wrong interval for region");
            Assert.assertEquals(region.isActive(), expectedResults.get(regionCount).getRight().booleanValue(), "Region incorrectly marked as active/inactive");
            ++regionCount;
        }
        Assert.assertEquals(regionCount, expectedResults.size(), "Too few regions returned from AssemblyRegion.createFromReadShard()");
    }
}
Also used : Path(java.nio.file.Path) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) ActivityProfileState(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Pair(org.apache.commons.lang3.tuple.Pair) MappingQualityReadFilter(org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with CountingReadFilter

use of org.broadinstitute.hellbender.engine.filters.CountingReadFilter in project gatk by broadinstitute.

the class LocusWalker method traverse.

/**
     * Implementation of locus-based traversal.
     * Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
     *
     * The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
     * for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
     * and including deletions only if {@link #includeDeletions()} returns {@code true}.
     */
@Override
public void traverse() {
    final SAMFileHeader header = getHeaderForReads();
    // get the samples from the read groups
    final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
    final CountingReadFilter countedFilter = makeReadFilter();
    // get the filter and transformed iterator
    final Iterator<GATKRead> readIterator = getTransformedReadStream(countedFilter).iterator();
    // get the LIBS
    final LocusIteratorByState libs = new LocusIteratorByState(readIterator, getDownsamplingInfo(), keepUniqueReadListInLibs(), samples, header, includeDeletions(), includeNs());
    final Iterator<AlignmentContext> iterator = createAlignmentContextIterator(header, libs);
    // iterate over each alignment, and apply the function
    final Spliterator<AlignmentContext> spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
    StreamSupport.stream(spliterator, false).forEach(alignmentContext -> {
        final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
        apply(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(features, alignmentInterval));
        progressMeter.update(alignmentInterval);
    });
    logger.info(countedFilter.getSummaryLine());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 4 with CountingReadFilter

use of org.broadinstitute.hellbender.engine.filters.CountingReadFilter in project gatk by broadinstitute.

the class TwoPassReadWalker method traverse.

@Override
public void traverse() {
    // Process each read in the input stream.
    // Supply reference bases spanning each read, if a reference is available.
    final CountingReadFilter countedFilter = makeReadFilter();
    traverseReads(countedFilter, this::firstPassApply);
    logger.info("Finished first pass through the reads");
    afterFirstPass();
    // Need to reinitialize the reads and intervals so they are guaranteed to pass over a file
    initializeReads();
    setReadTraversalBounds();
    logger.info("Starting second pass through the reads");
    traverseReads(countedFilter, this::secondPassApply);
    logger.info(countedFilter.getSummaryLine());
}
Also used : CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter)

Example 5 with CountingReadFilter

use of org.broadinstitute.hellbender.engine.filters.CountingReadFilter in project gatk by broadinstitute.

the class VariantWalkerBase method traverse.

/**
     * Implementation of variant-based traversal.
     * Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
     */
@Override
public void traverse() {
    final VariantFilter variantfilter = makeVariantFilter();
    final CountingReadFilter readFilter = makeReadFilter();
    // Process each variant in the input stream.
    StreamSupport.stream(getSpliteratorForDrivingVariants(), false).filter(variantfilter).forEach(variant -> {
        final SimpleInterval variantInterval = new SimpleInterval(variant);
        apply(variant, new ReadsContext(reads, variantInterval, readFilter), new ReferenceContext(reference, variantInterval), new FeatureContext(features, variantInterval));
        progressMeter.update(variantInterval);
    });
}
Also used : CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) VariantFilter(org.broadinstitute.hellbender.engine.filters.VariantFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Aggregations

CountingReadFilter (org.broadinstitute.hellbender.engine.filters.CountingReadFilter)7 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)5 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)1 File (java.io.File)1 Path (java.nio.file.Path)1 Pair (org.apache.commons.lang3.tuple.Pair)1 MappingQualityReadFilter (org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter)1 VariantFilter (org.broadinstitute.hellbender.engine.filters.VariantFilter)1 WellformedReadFilter (org.broadinstitute.hellbender.engine.filters.WellformedReadFilter)1 ActivityProfileState (org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState)1 PositionalDownsampler (org.broadinstitute.hellbender.utils.downsampling.PositionalDownsampler)1 CachingIndexedFastaSequenceFile (org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile)1 LocusIteratorByState (org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState)1 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)1 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)1 Test (org.testng.annotations.Test)1