use of org.broadinstitute.hellbender.metrics.InsertSizeMetricsArgumentCollection in project gatk by broadinstitute.
the class InsertSizeMetricsCollectorSparkUnitTest method test.
@Test(dataProvider = "metricsfiles", groups = "spark")
public void test(final String fileName, final String referenceName, final boolean allLevels, final String expectedResultsFile) throws IOException {
final String inputPath = new File(TEST_DATA_DIR, fileName).getAbsolutePath();
final String referencePath = referenceName != null ? new File(referenceName).getAbsolutePath() : null;
final File outfile = BaseTest.createTempFile("test", ".insert_size_metrics");
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadsSparkSource readSource = new ReadsSparkSource(ctx, ValidationStringency.DEFAULT_STRINGENCY);
SAMFileHeader samHeader = readSource.getHeader(inputPath, referencePath);
JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputPath, referencePath);
InsertSizeMetricsArgumentCollection isArgs = new InsertSizeMetricsArgumentCollection();
isArgs.output = outfile.getAbsolutePath();
if (allLevels) {
isArgs.metricAccumulationLevel.accumulationLevels = new HashSet<>();
isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.ALL_READS);
isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.SAMPLE);
isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.LIBRARY);
isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.READ_GROUP);
}
InsertSizeMetricsCollectorSpark isSpark = new InsertSizeMetricsCollectorSpark();
isSpark.initialize(isArgs, samHeader, null);
// Since we're bypassing the framework in order to force this test to run on multiple partitions, we
// need to make the read filter manually since we don't have the plugin descriptor to do it for us; so
// remove the (default) FirstOfPairReadFilter filter and add in the SECOND_IN_PAIR manually since thats
// required for our tests to pass
List<ReadFilter> readFilters = isSpark.getDefaultReadFilters();
readFilters.stream().filter(f -> !f.getClass().getSimpleName().equals(ReadFilterLibrary.FirstOfPairReadFilter.class.getSimpleName()));
ReadFilter rf = ReadFilter.fromList(readFilters, samHeader);
// Force the input RDD to be split into two partitions to ensure that the
// reduce/combiners run
rddParallelReads = rddParallelReads.repartition(2);
isSpark.collectMetrics(rddParallelReads.filter(r -> rf.test(r)), samHeader);
isSpark.saveMetrics(fileName, null);
IntegrationTestSpec.assertEqualTextFiles(outfile, new File(TEST_DATA_DIR, expectedResultsFile), "#");
}
Aggregations