use of htsjdk.samtools.seekablestream.SeekableStream in project gatk by broadinstitute.
the class NioBam method getReads.
/** Parses the BAM file into SAMRecords. Will be distributed onto at least 'numPartitions' partitions. **/
public JavaRDD<SAMRecord> getReads(JavaSparkContext ctx, int numPartitions) {
try {
Path bamPath = IOUtils.getPath(bam);
ChannelAsSeekableStream bamOverNIO = new ChannelAsSeekableStream(Files.newByteChannel(bamPath), bamPath.toString());
final byte[] index = getIndex();
SeekableStream indexInMemory = new ByteArraySeekableStream(index);
SamReader bam3 = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(SamInputResource.of(bamOverNIO).index(indexInMemory));
List<QueryInterval> chunks = getAllChunksBalanced(bam3, numPartitions);
// Ideally we'd get exactly the number of chunks the user is asking for, but until then...
logger.debug("We got: " + chunks.size() + " chunks.");
return ctx.parallelize(chunks, chunks.size()).flatMap(qi -> new ReadsIterable(bam, index, qi).iterator());
} catch (IOException e) {
throw new GATKException("I/O error loading reads", e);
}
}
Aggregations