use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.
the class IntervalAlignmentContextIteratorUnitTest method getAlignmentContexts.
private List<AlignmentContext> getAlignmentContexts(final List<SimpleInterval> locusIntervals, final String bamPath) {
final List<String> sampleNames = Collections.singletonList("NA12878");
final ReadsDataSource gatkReads = new ReadsDataSource(IOUtils.getPath(bamPath));
final SAMFileHeader header = gatkReads.getHeader();
final Stream<GATKRead> filteredReads = Utils.stream(gatkReads).filter(new WellformedReadFilter(header).and(new ReadFilterLibrary.MappedReadFilter()));
final SAMSequenceDictionary dictionary = header.getSequenceDictionary();
final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true);
List<SimpleInterval> relevantIntervals = locusIntervals;
if (relevantIntervals == null) {
relevantIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
}
final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(relevantIntervals.iterator());
final IntervalAlignmentContextIterator intervalAlignmentContextIterator = new IntervalAlignmentContextIterator(locusIteratorByState, intervalLocusIterator, dictionary);
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(intervalAlignmentContextIterator, Spliterator.ORDERED), false).collect(Collectors.toList());
}
use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.
the class PrintReadsIntegrationTest method testUnmappedReadInclusion.
@Test(dataProvider = "UnmappedReadInclusionTestData")
public void testUnmappedReadInclusion(final File input, final String reference, final List<String> intervalStrings, final List<String> expectedReadNames) {
final File outFile = createTempFile("testUnmappedReadInclusion", ".bam");
final ArgumentsBuilder args = new ArgumentsBuilder();
args.add("-I");
args.add(input.getAbsolutePath());
args.add("-O");
args.add(outFile.getAbsolutePath());
for (final String intervalString : intervalStrings) {
args.add("-L");
args.add(intervalString);
}
if (reference != null) {
args.add("-R");
args.add(reference);
}
runCommandLine(args);
try (final ReadsDataSource outputReadsSource = new ReadsDataSource(outFile.toPath())) {
final List<GATKRead> actualReads = new ArrayList<>();
for (final GATKRead read : outputReadsSource) {
actualReads.add(read);
}
Assert.assertEquals(actualReads.size(), expectedReadNames.size(), "Wrong number of reads output");
for (int readNumber = 0; readNumber < actualReads.size(); ++readNumber) {
Assert.assertEquals(actualReads.get(readNumber).getName(), expectedReadNames.get(readNumber), "Unexpected read name");
}
}
}
use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.
the class ReadsSparkSourceUnitTest method getSerialReads.
/**
* Loads Reads using samReaderFactory, then calling ctx.parallelize.
* @param bam file to load
* @return RDD of (SAMRecord-backed) GATKReads from the file.
*/
public JavaRDD<GATKRead> getSerialReads(final JavaSparkContext ctx, final String bam, final String referencePath, final ValidationStringency validationStringency) {
final SAMFileHeader readsHeader = new ReadsSparkSource(ctx, validationStringency).getHeader(bam, referencePath);
final SamReaderFactory samReaderFactory;
if (referencePath != null) {
final File reference = new File(referencePath);
samReaderFactory = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference);
} else {
samReaderFactory = SamReaderFactory.makeDefault().validationStringency(validationStringency);
}
ReadsDataSource bam2 = new ReadsDataSource(IOUtils.getPath(bam), samReaderFactory);
List<GATKRead> records = Lists.newArrayList();
for (GATKRead read : bam2) {
records.add(read);
}
return ctx.parallelize(records);
}
use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.
the class SparkUtilsUnitTest method testConvertHeaderlessHadoopBamShardToBam.
@Test
public void testConvertHeaderlessHadoopBamShardToBam() throws Exception {
final File bamShard = new File(publicTestDir + "org/broadinstitute/hellbender/utils/spark/reads_data_source_test1.bam.headerless.part-r-00000");
final File output = createTempFile("testConvertHadoopBamShardToBam", ".bam");
final File headerSource = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final int expectedReadCount = 11;
boolean shardIsNotValidBam = false;
try (final ReadsDataSource readsSource = new ReadsDataSource(bamShard.toPath())) {
for (final GATKRead read : readsSource) {
}
} catch (SAMFormatException e) {
shardIsNotValidBam = true;
}
Assert.assertTrue(shardIsNotValidBam, "Input shard should not be a valid BAM");
SAMFileHeader header = null;
try (final SamReader headerReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(headerSource)) {
header = headerReader.getFileHeader();
} catch (IOException e) {
throw new UserException("Error reading header from " + headerSource.getAbsolutePath(), e);
}
SparkUtils.convertHeaderlessHadoopBamShardToBam(bamShard, header, output);
int actualCount = 0;
try (final ReadsDataSource readsSource = new ReadsDataSource(output.toPath())) {
for (final GATKRead read : readsSource) {
++actualCount;
}
}
Assert.assertEquals(actualCount, expectedReadCount, "Wrong number of reads in final BAM file");
}
use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk-protected by broadinstitute.
the class HaplotypeCallerIntegrationTest method testBamoutProducesReasonablySizedOutput.
@Test
public void testBamoutProducesReasonablySizedOutput() {
Utils.resetRandomGenerator();
// We will test that when running with -bamout over the testInterval, we produce
// a bam with a number of reads that is within 10% of what GATK3.5 produces with
// -bamout over the same interval. This is just to test that we produce a reasonably-sized
// bam for the region, not to validate the haplotypes, etc. We don't want
// this test to fail unless there is a likely problem with -bamout itself (eg., empty
// or truncated bam).
final String testInterval = "20:10000000-10010000";
final int gatk3BamoutNumReads = 5170;
final File vcfOutput = createTempFile("testBamoutProducesReasonablySizedOutput", ".vcf");
final File bamOutput = createTempFile("testBamoutProducesReasonablySizedOutput", ".bam");
final String[] args = { "-I", NA12878_20_21_WGS_bam, "-R", b37_reference_20_21, "-L", testInterval, "-O", vcfOutput.getAbsolutePath(), "-bamout", bamOutput.getAbsolutePath(), "-pairHMM", "AVX_LOGLESS_CACHING", "-stand_call_conf", "30.0" };
runCommandLine(args);
try (final ReadsDataSource bamOutReadsSource = new ReadsDataSource(bamOutput.toPath())) {
int actualBamoutNumReads = 0;
for (final GATKRead read : bamOutReadsSource) {
++actualBamoutNumReads;
}
final int readCountDifference = Math.abs(actualBamoutNumReads - gatk3BamoutNumReads);
Assert.assertTrue(((double) readCountDifference / gatk3BamoutNumReads) < 0.10, "-bamout produced a bam with over 10% fewer/more reads than expected");
}
}
Aggregations