use of org.broadinstitute.hellbender.tools.spark.utils.HopscotchSet in project gatk by broadinstitute.
the class PathSeqFilterSpark method doKmerFiltering.
@SuppressWarnings("unchecked")
private JavaRDD<GATKRead> doKmerFiltering(final JavaSparkContext ctx, final JavaRDD<GATKRead> reads) {
final PipelineOptions options = getAuthenticatedGCSOptions();
Input input = new Input(BucketUtils.openFile(KMER_LIB_PATH));
Kryo kryo = new Kryo();
kryo.setReferences(false);
Set<SVKmer> kmerLibSet = (HopscotchSet<SVKmer>) kryo.readClassAndObject(input);
return reads.filter(new ContainsKmerReadFilterSpark(ctx.broadcast(kmerLibSet), KMER_SIZE));
}
use of org.broadinstitute.hellbender.tools.spark.utils.HopscotchSet in project gatk by broadinstitute.
the class PathSeqKmerSpark method runTool.
/** Get the list of distinct kmers in the reference, and write them to a file as a HopScotchSet. */
@Override
protected void runTool(final JavaSparkContext ctx) {
final SAMFileHeader hdr = getHeaderForReads();
SAMSequenceDictionary dict = null;
if (hdr != null)
dict = hdr.getSequenceDictionary();
final PipelineOptions options = getAuthenticatedGCSOptions();
final ReferenceMultiSource referenceMultiSource = getReference();
final List<SVKmer> kmerList = findKmers(ctx, KMER_SIZE, referenceMultiSource, options, dict);
final HopscotchSet<SVKmer> kmerSet = new HopscotchSet<>(kmerList);
final Output output = new Output(BucketUtils.createFile(OUTPUT_FILE));
final Kryo kryo = new Kryo();
kryo.setReferences(false);
kryo.writeClassAndObject(output, kmerSet);
output.close();
}
use of org.broadinstitute.hellbender.tools.spark.utils.HopscotchSet in project gatk by broadinstitute.
the class PathSeqKmerSparkIntegrationTest method test.
@SuppressWarnings("unchecked")
@Test(groups = "spark")
public void test() throws Exception {
final File expectedFile = getTestFile("kmer.hss");
final File ref = getTestFile("hg19mini.fasta");
final File output = createTempFile("test", ".hss");
if (!output.delete()) {
Assert.fail();
}
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addFileArgument("reference", ref);
args.addOutput(output);
this.runCommandLine(args.getArgsArray());
Input input_expected = new Input(FileUtils.openInputStream(expectedFile));
Input input_test = new Input(FileUtils.openInputStream(output));
Kryo kryo = new Kryo();
kryo.setReferences(false);
Set<SVKmer> expectedKmerLib = (HopscotchSet<SVKmer>) kryo.readClassAndObject(input_expected);
Set<SVKmer> testKmerLib = (HopscotchSet<SVKmer>) kryo.readClassAndObject(input_test);
Assert.assertEquals(expectedKmerLib, testKmerLib);
}
Aggregations