Search in sources :

Example 1 with LocusIteratorByState

use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.

the class IntervalAlignmentContextIteratorUnitTest method getAlignmentContexts.

private List<AlignmentContext> getAlignmentContexts(final List<SimpleInterval> locusIntervals, final String bamPath) {
    final List<String> sampleNames = Collections.singletonList("NA12878");
    final ReadsDataSource gatkReads = new ReadsDataSource(IOUtils.getPath(bamPath));
    final SAMFileHeader header = gatkReads.getHeader();
    final Stream<GATKRead> filteredReads = Utils.stream(gatkReads).filter(new WellformedReadFilter(header).and(new ReadFilterLibrary.MappedReadFilter()));
    final SAMSequenceDictionary dictionary = header.getSequenceDictionary();
    final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true);
    List<SimpleInterval> relevantIntervals = locusIntervals;
    if (relevantIntervals == null) {
        relevantIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
    }
    final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(relevantIntervals.iterator());
    final IntervalAlignmentContextIterator intervalAlignmentContextIterator = new IntervalAlignmentContextIterator(locusIteratorByState, intervalLocusIterator, dictionary);
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(intervalAlignmentContextIterator, Spliterator.ORDERED), false).collect(Collectors.toList());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 2 with LocusIteratorByState

use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.

the class AssemblyRegion method createFromReadShard.

/**
     * Divide a read shard up into one or more AssemblyRegions using the provided AssemblyRegionEvaluator to find
     * the borders between "active" and "inactive" regions within the shard.
     *
     * @param shard Shard to divide into assembly regions
     * @param readsHeader header for the reads
     * @param referenceContext reference data overlapping the shard's extended span (including padding)
     * @param features features overlapping the shard's extended span (including padding)
     * @param evaluator AssemblyRegionEvaluator used to label each locus as either active or inactive
     * @param minRegionSize minimum size for each assembly region
     * @param maxRegionSize maximum size for each assembly region
     * @param assemblyRegionPadding each assembly region will be padded by this amount on each side
     * @param activeProbThreshold minimum probability for a site to be considered active, as reported by the provided evaluator
     * @param maxProbPropagationDistance maximum number of bases probabilities can propagate in each direction when finding region boundaries
     * @return a Iterable over one or more AssemblyRegions, each marked as either "active" or "inactive", spanning
     *         part of the provided Shard, and filled with all reads that overlap the region.
     */
public static Iterable<AssemblyRegion> createFromReadShard(final Shard<GATKRead> shard, final SAMFileHeader readsHeader, final ReferenceContext referenceContext, final FeatureContext features, final AssemblyRegionEvaluator evaluator, final int minRegionSize, final int maxRegionSize, final int assemblyRegionPadding, final double activeProbThreshold, final int maxProbPropagationDistance) {
    Utils.nonNull(shard);
    Utils.nonNull(readsHeader);
    Utils.nonNull(referenceContext);
    Utils.nonNull(features);
    Utils.nonNull(evaluator);
    Utils.validateArg(minRegionSize >= 1, "minRegionSize must be >= 1");
    Utils.validateArg(maxRegionSize >= 1, "maxRegionSize must be >= 1");
    Utils.validateArg(minRegionSize <= maxRegionSize, "minRegionSize must be <= maxRegionSize");
    Utils.validateArg(assemblyRegionPadding >= 0, "assemblyRegionPadding must be >= 0");
    Utils.validateArg(activeProbThreshold >= 0.0, "activeProbThreshold must be >= 0.0");
    Utils.validateArg(maxProbPropagationDistance >= 0, "maxProbPropagationDistance must be >= 0");
    // TODO: refactor this method so that we don't need to load all reads from the shard into memory at once!
    final List<GATKRead> windowReads = new ArrayList<>();
    for (final GATKRead read : shard) {
        windowReads.add(read);
    }
    final LocusIteratorByState locusIterator = new LocusIteratorByState(windowReads.iterator(), DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readsHeader), readsHeader, false);
    final ActivityProfile activityProfile = new BandPassActivityProfile(null, maxProbPropagationDistance, activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readsHeader);
    // First, use our activity profile to determine the bounds of each assembly region:
    List<AssemblyRegion> assemblyRegions = determineAssemblyRegionBounds(shard, locusIterator, activityProfile, readsHeader, referenceContext, features, evaluator, minRegionSize, maxRegionSize, assemblyRegionPadding);
    // Then, fill the assembly regions with overlapping reads from the shard:
    final PeekableIterator<GATKRead> reads = new PeekableIterator<>(windowReads.iterator());
    fillAssemblyRegionsWithReads(assemblyRegions, reads);
    return assemblyRegions;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) ActivityProfile(org.broadinstitute.hellbender.utils.activityprofile.ActivityProfile) BandPassActivityProfile(org.broadinstitute.hellbender.utils.activityprofile.BandPassActivityProfile) PeekableIterator(htsjdk.samtools.util.PeekableIterator) BandPassActivityProfile(org.broadinstitute.hellbender.utils.activityprofile.BandPassActivityProfile)

Example 3 with LocusIteratorByState

use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.

the class ReferenceConfidenceModel method getPileupsOverReference.

/**
     * Get a list of pileups that span the entire active region span, in order, one for each position
     */
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
    Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
    Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
    final List<GATKRead> reads = activeRegion.getReads();
    final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
    final int startPos = activeRegionSpan.getStart();
    final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
    AlignmentContext next = libs.advanceToLocus(startPos, true);
    for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
        if (next != null && next.getLocation().getStart() == curPos) {
            pileups.add(next.getBasePileup());
            next = libs.hasNext() ? libs.next() : null;
        } else {
            // no data, so we create empty pileups
            pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
        }
    }
    return pileups;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 4 with LocusIteratorByState

use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.

the class LocusWalker method traverse.

/**
     * Implementation of locus-based traversal.
     * Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
     *
     * The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
     * for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
     * and including deletions only if {@link #includeDeletions()} returns {@code true}.
     */
@Override
public void traverse() {
    final SAMFileHeader header = getHeaderForReads();
    // get the samples from the read groups
    final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
    final CountingReadFilter countedFilter = makeReadFilter();
    // get the filter and transformed iterator
    final Iterator<GATKRead> readIterator = getTransformedReadStream(countedFilter).iterator();
    // get the LIBS
    final LocusIteratorByState libs = new LocusIteratorByState(readIterator, getDownsamplingInfo(), keepUniqueReadListInLibs(), samples, header, includeDeletions(), includeNs());
    final Iterator<AlignmentContext> iterator = createAlignmentContextIterator(header, libs);
    // iterate over each alignment, and apply the function
    final Spliterator<AlignmentContext> spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
    StreamSupport.stream(spliterator, false).forEach(alignmentContext -> {
        final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
        apply(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(features, alignmentInterval));
        progressMeter.update(alignmentInterval);
    });
    logger.info(countedFilter.getSummaryLine());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 5 with LocusIteratorByState

use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModel method getPileupsOverReference.

/**
     * Get a list of pileups that span the entire active region span, in order, one for each position
     */
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
    Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
    Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
    final List<GATKRead> reads = activeRegion.getReads();
    final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
    final int startPos = activeRegionSpan.getStart();
    final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
    AlignmentContext next = libs.advanceToLocus(startPos, true);
    for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
        if (next != null && next.getLocation().getStart() == curPos) {
            pileups.add(next.getBasePileup());
            next = libs.hasNext() ? libs.next() : null;
        } else {
            // no data, so we create empty pileups
            pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
        }
    }
    return pileups;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Aggregations

LocusIteratorByState (org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState)8 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)8 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)7 SAMFileHeader (htsjdk.samtools.SAMFileHeader)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 File (java.io.File)2 AlignmentContext (org.broadinstitute.hellbender.engine.AlignmentContext)2 ReadFilter (org.broadinstitute.hellbender.engine.filters.ReadFilter)2 ActivityProfileState (org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState)2 CachingIndexedFastaSequenceFile (org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile)2 ReadFilteringIterator (org.broadinstitute.hellbender.utils.iterators.ReadFilteringIterator)2 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)2 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)2 Test (org.testng.annotations.Test)2 ImmutableList (com.google.common.collect.ImmutableList)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 PeekableIterator (htsjdk.samtools.util.PeekableIterator)1 java.util (java.util)1 Collectors (java.util.stream.Collectors)1 StreamSupport (java.util.stream.StreamSupport)1