use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class IntervalAlignmentContextIteratorUnitTest method getAlignmentContexts.
private List<AlignmentContext> getAlignmentContexts(final List<SimpleInterval> locusIntervals, final String bamPath) {
final List<String> sampleNames = Collections.singletonList("NA12878");
final ReadsDataSource gatkReads = new ReadsDataSource(IOUtils.getPath(bamPath));
final SAMFileHeader header = gatkReads.getHeader();
final Stream<GATKRead> filteredReads = Utils.stream(gatkReads).filter(new WellformedReadFilter(header).and(new ReadFilterLibrary.MappedReadFilter()));
final SAMSequenceDictionary dictionary = header.getSequenceDictionary();
final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true);
List<SimpleInterval> relevantIntervals = locusIntervals;
if (relevantIntervals == null) {
relevantIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
}
final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(relevantIntervals.iterator());
final IntervalAlignmentContextIterator intervalAlignmentContextIterator = new IntervalAlignmentContextIterator(locusIteratorByState, intervalLocusIterator, dictionary);
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(intervalAlignmentContextIterator, Spliterator.ORDERED), false).collect(Collectors.toList());
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class AssemblyRegion method createFromReadShard.
/**
* Divide a read shard up into one or more AssemblyRegions using the provided AssemblyRegionEvaluator to find
* the borders between "active" and "inactive" regions within the shard.
*
* @param shard Shard to divide into assembly regions
* @param readsHeader header for the reads
* @param referenceContext reference data overlapping the shard's extended span (including padding)
* @param features features overlapping the shard's extended span (including padding)
* @param evaluator AssemblyRegionEvaluator used to label each locus as either active or inactive
* @param minRegionSize minimum size for each assembly region
* @param maxRegionSize maximum size for each assembly region
* @param assemblyRegionPadding each assembly region will be padded by this amount on each side
* @param activeProbThreshold minimum probability for a site to be considered active, as reported by the provided evaluator
* @param maxProbPropagationDistance maximum number of bases probabilities can propagate in each direction when finding region boundaries
* @return a Iterable over one or more AssemblyRegions, each marked as either "active" or "inactive", spanning
* part of the provided Shard, and filled with all reads that overlap the region.
*/
public static Iterable<AssemblyRegion> createFromReadShard(final Shard<GATKRead> shard, final SAMFileHeader readsHeader, final ReferenceContext referenceContext, final FeatureContext features, final AssemblyRegionEvaluator evaluator, final int minRegionSize, final int maxRegionSize, final int assemblyRegionPadding, final double activeProbThreshold, final int maxProbPropagationDistance) {
Utils.nonNull(shard);
Utils.nonNull(readsHeader);
Utils.nonNull(referenceContext);
Utils.nonNull(features);
Utils.nonNull(evaluator);
Utils.validateArg(minRegionSize >= 1, "minRegionSize must be >= 1");
Utils.validateArg(maxRegionSize >= 1, "maxRegionSize must be >= 1");
Utils.validateArg(minRegionSize <= maxRegionSize, "minRegionSize must be <= maxRegionSize");
Utils.validateArg(assemblyRegionPadding >= 0, "assemblyRegionPadding must be >= 0");
Utils.validateArg(activeProbThreshold >= 0.0, "activeProbThreshold must be >= 0.0");
Utils.validateArg(maxProbPropagationDistance >= 0, "maxProbPropagationDistance must be >= 0");
// TODO: refactor this method so that we don't need to load all reads from the shard into memory at once!
final List<GATKRead> windowReads = new ArrayList<>();
for (final GATKRead read : shard) {
windowReads.add(read);
}
final LocusIteratorByState locusIterator = new LocusIteratorByState(windowReads.iterator(), DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readsHeader), readsHeader, false);
final ActivityProfile activityProfile = new BandPassActivityProfile(null, maxProbPropagationDistance, activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readsHeader);
// First, use our activity profile to determine the bounds of each assembly region:
List<AssemblyRegion> assemblyRegions = determineAssemblyRegionBounds(shard, locusIterator, activityProfile, readsHeader, referenceContext, features, evaluator, minRegionSize, maxRegionSize, assemblyRegionPadding);
// Then, fill the assembly regions with overlapping reads from the shard:
final PeekableIterator<GATKRead> reads = new PeekableIterator<>(windowReads.iterator());
fillAssemblyRegionsWithReads(assemblyRegions, reads);
return assemblyRegions;
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class ReferenceConfidenceModel method getPileupsOverReference.
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
final List<GATKRead> reads = activeRegion.getReads();
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
final int startPos = activeRegionSpan.getStart();
final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
AlignmentContext next = libs.advanceToLocus(startPos, true);
for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
if (next != null && next.getLocation().getStart() == curPos) {
pileups.add(next.getBasePileup());
next = libs.hasNext() ? libs.next() : null;
} else {
// no data, so we create empty pileups
pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
}
}
return pileups;
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class LocusWalker method traverse.
/**
* Implementation of locus-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*
* The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
* for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
* and including deletions only if {@link #includeDeletions()} returns {@code true}.
*/
@Override
public void traverse() {
final SAMFileHeader header = getHeaderForReads();
// get the samples from the read groups
final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
final CountingReadFilter countedFilter = makeReadFilter();
// get the filter and transformed iterator
final Iterator<GATKRead> readIterator = getTransformedReadStream(countedFilter).iterator();
// get the LIBS
final LocusIteratorByState libs = new LocusIteratorByState(readIterator, getDownsamplingInfo(), keepUniqueReadListInLibs(), samples, header, includeDeletions(), includeNs());
final Iterator<AlignmentContext> iterator = createAlignmentContextIterator(header, libs);
// iterate over each alignment, and apply the function
final Spliterator<AlignmentContext> spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
StreamSupport.stream(spliterator, false).forEach(alignmentContext -> {
final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
apply(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(features, alignmentInterval));
progressMeter.update(alignmentInterval);
});
logger.info(countedFilter.getSummaryLine());
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method getPileupsOverReference.
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
final List<GATKRead> reads = activeRegion.getReads();
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
final int startPos = activeRegionSpan.getStart();
final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
AlignmentContext next = libs.advanceToLocus(startPos, true);
for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
if (next != null && next.getLocation().getStart() == curPos) {
pileups.add(next.getBasePileup());
next = libs.hasNext() ? libs.next() : null;
} else {
// no data, so we create empty pileups
pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
}
}
return pileups;
}
Aggregations