use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class LocusWalkerSpark method getAlignmentsFunction.
/**
* Return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
* @param bReferenceSource the reference source broadcast
* @param bFeatureManager the feature manager broadcast
* @param sequenceDictionary the sequence dictionary for the reads
* @param header the reads header
* @param downsamplingInfo the downsampling method for the reads
* @return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
*/
private static FlatMapFunction<Shard<GATKRead>, LocusWalkerContext> getAlignmentsFunction(Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager, SAMSequenceDictionary sequenceDictionary, SAMFileHeader header, LIBSDownsamplingInfo downsamplingInfo) {
return (FlatMapFunction<Shard<GATKRead>, LocusWalkerContext>) shardedRead -> {
SimpleInterval interval = shardedRead.getInterval();
SimpleInterval paddedInterval = shardedRead.getPaddedInterval();
Iterator<GATKRead> readIterator = shardedRead.iterator();
ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
FeatureManager fm = bFeatureManager == null ? null : bFeatureManager.getValue();
final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, false, samples, header, true, false);
IntervalOverlappingIterator<AlignmentContext> alignmentContexts = new IntervalOverlappingIterator<>(libs, ImmutableList.of(interval), sequenceDictionary);
final Spliterator<AlignmentContext> alignmentContextSpliterator = Spliterators.spliteratorUnknownSize(alignmentContexts, 0);
return StreamSupport.stream(alignmentContextSpliterator, false).map(alignmentContext -> {
final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
return new LocusWalkerContext(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(fm, alignmentInterval));
}).iterator();
};
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk by broadinstitute.
the class HaplotypeCallerEngineUnitTest method testIsActive.
@Test
public void testIsActive() throws IOException {
final File testBam = new File(NA12878_20_21_WGS_bam);
final File reference = new File(b37_reference_20_21);
final SimpleInterval shardInterval = new SimpleInterval("20", 10000000, 10001000);
final SimpleInterval paddedShardInterval = new SimpleInterval(shardInterval.getContig(), shardInterval.getStart() - 100, shardInterval.getEnd() + 100);
final HaplotypeCallerArgumentCollection hcArgs = new HaplotypeCallerArgumentCollection();
// We expect isActive() to return 1.0 for the sites below, and 0.0 for all other sites
final List<SimpleInterval> expectedActiveSites = Arrays.asList(new SimpleInterval("20", 9999996, 9999996), new SimpleInterval("20", 9999997, 9999997), new SimpleInterval("20", 10000117, 10000117), new SimpleInterval("20", 10000211, 10000211), new SimpleInterval("20", 10000439, 10000439), new SimpleInterval("20", 10000598, 10000598), new SimpleInterval("20", 10000694, 10000694), new SimpleInterval("20", 10000758, 10000758), new SimpleInterval("20", 10001019, 10001019));
try (final ReadsDataSource reads = new ReadsDataSource(testBam.toPath());
final ReferenceDataSource ref = new ReferenceFileSource(reference);
final CachingIndexedFastaSequenceFile referenceReader = new CachingIndexedFastaSequenceFile(reference)) {
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, reads.getHeader(), referenceReader);
List<ReadFilter> hcFilters = HaplotypeCallerEngine.makeStandardHCReadFilters();
hcFilters.forEach(filter -> filter.setHeader(reads.getHeader()));
ReadFilter hcCombinedFilter = hcFilters.get(0);
for (int i = 1; i < hcFilters.size(); ++i) {
hcCombinedFilter = hcCombinedFilter.and(hcFilters.get(i));
}
final Iterator<GATKRead> readIter = new ReadFilteringIterator(reads.query(paddedShardInterval), hcCombinedFilter);
final LocusIteratorByState libs = new LocusIteratorByState(readIter, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(reads.getHeader()), reads.getHeader(), false);
libs.forEachRemaining(pileup -> {
final SimpleInterval pileupInterval = new SimpleInterval(pileup.getLocation());
final ReferenceContext pileupRefContext = new ReferenceContext(ref, pileupInterval);
final ActivityProfileState isActiveResult = hcEngine.isActive(pileup, pileupRefContext, new FeatureContext(null, pileupInterval));
final double expectedIsActiveValue = expectedActiveSites.contains(pileupInterval) ? 1.0 : 0.0;
Assert.assertEquals(isActiveResult.isActiveProb(), expectedIsActiveValue, "Wrong isActive probability for site " + pileupInterval);
});
}
}
use of org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState in project gatk-protected by broadinstitute.
the class HaplotypeCallerEngineUnitTest method testIsActive.
@Test
public void testIsActive() throws IOException {
final File testBam = new File(NA12878_20_21_WGS_bam);
final File reference = new File(b37_reference_20_21);
final SimpleInterval shardInterval = new SimpleInterval("20", 10000000, 10001000);
final SimpleInterval paddedShardInterval = new SimpleInterval(shardInterval.getContig(), shardInterval.getStart() - 100, shardInterval.getEnd() + 100);
final HaplotypeCallerArgumentCollection hcArgs = new HaplotypeCallerArgumentCollection();
// We expect isActive() to return 1.0 for the sites below, and 0.0 for all other sites
final List<SimpleInterval> expectedActiveSites = Arrays.asList(new SimpleInterval("20", 9999996, 9999996), new SimpleInterval("20", 9999997, 9999997), new SimpleInterval("20", 10000117, 10000117), new SimpleInterval("20", 10000211, 10000211), new SimpleInterval("20", 10000439, 10000439), new SimpleInterval("20", 10000598, 10000598), new SimpleInterval("20", 10000694, 10000694), new SimpleInterval("20", 10000758, 10000758), new SimpleInterval("20", 10001019, 10001019));
try (final ReadsDataSource reads = new ReadsDataSource(testBam.toPath());
final ReferenceDataSource ref = new ReferenceFileSource(reference);
final CachingIndexedFastaSequenceFile referenceReader = new CachingIndexedFastaSequenceFile(reference)) {
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, reads.getHeader(), referenceReader);
List<ReadFilter> hcFilters = HaplotypeCallerEngine.makeStandardHCReadFilters();
hcFilters.forEach(filter -> filter.setHeader(reads.getHeader()));
ReadFilter hcCombinedFilter = hcFilters.get(0);
for (int i = 1; i < hcFilters.size(); ++i) {
hcCombinedFilter = hcCombinedFilter.and(hcFilters.get(i));
}
final Iterator<GATKRead> readIter = new ReadFilteringIterator(reads.query(paddedShardInterval), hcCombinedFilter);
final LocusIteratorByState libs = new LocusIteratorByState(readIter, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(reads.getHeader()), reads.getHeader(), false);
libs.forEachRemaining(pileup -> {
final SimpleInterval pileupInterval = new SimpleInterval(pileup.getLocation());
final ReferenceContext pileupRefContext = new ReferenceContext(ref, pileupInterval);
final ActivityProfileState isActiveResult = hcEngine.isActive(pileup, pileupRefContext, new FeatureContext(null, pileupInterval));
final double expectedIsActiveValue = expectedActiveSites.contains(pileupInterval) ? 1.0 : 0.0;
Assert.assertEquals(isActiveResult.isActiveProb(), expectedIsActiveValue, "Wrong isActive probability for site " + pileupInterval);
});
}
}
Aggregations