use of org.broadinstitute.hellbender.utils.read.BDGAlignmentRecordToGATKReadAdapter in project gatk by broadinstitute.
the class ReadsSparkSource method getADAMReads.
/**
* Loads ADAM reads stored as Parquet.
* @param inputPath path to the Parquet data
* @return RDD of (ADAM-backed) GATKReads from the file.
*/
public JavaRDD<GATKRead> getADAMReads(final String inputPath, final List<SimpleInterval> intervals, final SAMFileHeader header) throws IOException {
Job job = Job.getInstance(ctx.hadoopConfiguration());
AvroParquetInputFormat.setAvroReadSchema(job, AlignmentRecord.getClassSchema());
Broadcast<SAMFileHeader> bHeader;
if (header == null) {
bHeader = ctx.broadcast(null);
} else {
bHeader = ctx.broadcast(header);
}
@SuppressWarnings("unchecked") JavaRDD<AlignmentRecord> recordsRdd = ctx.newAPIHadoopFile(inputPath, AvroParquetInputFormat.class, Void.class, AlignmentRecord.class, job.getConfiguration()).values();
JavaRDD<GATKRead> readsRdd = recordsRdd.map(record -> new BDGAlignmentRecordToGATKReadAdapter(record, bHeader.getValue()));
JavaRDD<GATKRead> filteredRdd = readsRdd.filter(record -> samRecordOverlaps(record.convertToSAMRecord(header), intervals));
return putPairsInSamePartition(header, filteredRdd);
}
Aggregations