use of org.seqdoop.hadoop_bam.VariantContextWritable in project gatk by broadinstitute.
the class VariantsSparkSource method getParallelVariantContexts.
/**
* Loads variants in parallel using Hadoop-BAM for vcfs and bcfs.
* @param vcf file to load variants from.
* @param intervals intervals of variants to include, or null if all should be included.
* @return JavaRDD<VariantContext> of variants from all files.
*/
public JavaRDD<VariantContext> getParallelVariantContexts(final String vcf, final List<SimpleInterval> intervals) {
Configuration conf = new Configuration();
conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(), BGZFCodec.class.getCanonicalName());
if (intervals != null && !intervals.isEmpty()) {
VCFInputFormat.setIntervals(conf, intervals);
}
final JavaPairRDD<LongWritable, VariantContextWritable> rdd2 = ctx.newAPIHadoopFile(vcf, VCFInputFormat.class, LongWritable.class, VariantContextWritable.class, conf);
return rdd2.map(v1 -> v1._2().get());
}
use of org.seqdoop.hadoop_bam.VariantContextWritable in project gatk by broadinstitute.
the class VariantsSparkSink method pairVariantsWithVariantContextWritables.
private static JavaPairRDD<VariantContext, VariantContextWritable> pairVariantsWithVariantContextWritables(JavaRDD<VariantContext> records) {
return records.mapToPair(variantContext -> {
final VariantContextWritable variantContextWritable = new VariantContextWritable();
variantContextWritable.set(variantContext);
return new Tuple2<>(variantContext, variantContextWritable);
});
}
Aggregations