use of org.broadinstitute.hellbender.utils.activityprofile.BandPassActivityProfile in project gatk by broadinstitute.
the class AssemblyRegion method createFromReadShard.
/**
* Divide a read shard up into one or more AssemblyRegions using the provided AssemblyRegionEvaluator to find
* the borders between "active" and "inactive" regions within the shard.
*
* @param shard Shard to divide into assembly regions
* @param readsHeader header for the reads
* @param referenceContext reference data overlapping the shard's extended span (including padding)
* @param features features overlapping the shard's extended span (including padding)
* @param evaluator AssemblyRegionEvaluator used to label each locus as either active or inactive
* @param minRegionSize minimum size for each assembly region
* @param maxRegionSize maximum size for each assembly region
* @param assemblyRegionPadding each assembly region will be padded by this amount on each side
* @param activeProbThreshold minimum probability for a site to be considered active, as reported by the provided evaluator
* @param maxProbPropagationDistance maximum number of bases probabilities can propagate in each direction when finding region boundaries
* @return a Iterable over one or more AssemblyRegions, each marked as either "active" or "inactive", spanning
* part of the provided Shard, and filled with all reads that overlap the region.
*/
public static Iterable<AssemblyRegion> createFromReadShard(final Shard<GATKRead> shard, final SAMFileHeader readsHeader, final ReferenceContext referenceContext, final FeatureContext features, final AssemblyRegionEvaluator evaluator, final int minRegionSize, final int maxRegionSize, final int assemblyRegionPadding, final double activeProbThreshold, final int maxProbPropagationDistance) {
Utils.nonNull(shard);
Utils.nonNull(readsHeader);
Utils.nonNull(referenceContext);
Utils.nonNull(features);
Utils.nonNull(evaluator);
Utils.validateArg(minRegionSize >= 1, "minRegionSize must be >= 1");
Utils.validateArg(maxRegionSize >= 1, "maxRegionSize must be >= 1");
Utils.validateArg(minRegionSize <= maxRegionSize, "minRegionSize must be <= maxRegionSize");
Utils.validateArg(assemblyRegionPadding >= 0, "assemblyRegionPadding must be >= 0");
Utils.validateArg(activeProbThreshold >= 0.0, "activeProbThreshold must be >= 0.0");
Utils.validateArg(maxProbPropagationDistance >= 0, "maxProbPropagationDistance must be >= 0");
// TODO: refactor this method so that we don't need to load all reads from the shard into memory at once!
final List<GATKRead> windowReads = new ArrayList<>();
for (final GATKRead read : shard) {
windowReads.add(read);
}
final LocusIteratorByState locusIterator = new LocusIteratorByState(windowReads.iterator(), DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readsHeader), readsHeader, false);
final ActivityProfile activityProfile = new BandPassActivityProfile(null, maxProbPropagationDistance, activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readsHeader);
// First, use our activity profile to determine the bounds of each assembly region:
List<AssemblyRegion> assemblyRegions = determineAssemblyRegionBounds(shard, locusIterator, activityProfile, readsHeader, referenceContext, features, evaluator, minRegionSize, maxRegionSize, assemblyRegionPadding);
// Then, fill the assembly regions with overlapping reads from the shard:
final PeekableIterator<GATKRead> reads = new PeekableIterator<>(windowReads.iterator());
fillAssemblyRegionsWithReads(assemblyRegions, reads);
return assemblyRegions;
}
Aggregations