use of org.broadinstitute.hellbender.cmdline.argumentcollections.OpticalDuplicatesArgumentCollection in project gatk by broadinstitute.
the class MarkDuplicatesSparkUnitTest method markDupesTest.
@Test(dataProvider = "md", groups = "spark")
public void markDupesTest(final String input, final long totalExpected, final long dupsExpected) throws IOException {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadsSparkSource readSource = new ReadsSparkSource(ctx);
JavaRDD<GATKRead> reads = readSource.getParallelReads(input, null);
Assert.assertEquals(reads.count(), totalExpected);
SAMFileHeader header = readSource.getHeader(input, null);
OpticalDuplicatesArgumentCollection opticalDuplicatesArgumentCollection = new OpticalDuplicatesArgumentCollection();
final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ? new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.mark(reads, header, MarkDuplicatesScoringStrategy.SUM_OF_BASE_QUALITIES, finder, 1);
Assert.assertEquals(markedReads.count(), totalExpected);
JavaRDD<GATKRead> dupes = markedReads.filter(GATKRead::isDuplicate);
Assert.assertEquals(dupes.count(), dupsExpected);
}
Aggregations