Search in sources :

Example 16 with ReadFilter

use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.

the class GATKReadFilterPluginDescriptor method getInstance.

// Instantiate a new ReadFilter derived object and save it in the list
@Override
public Object getInstance(final Class<?> pluggableClass) throws IllegalAccessException, InstantiationException {
    ReadFilter readFilter = null;
    final String simpleName = pluggableClass.getSimpleName();
    if (allDiscoveredReadFilters.containsKey(simpleName)) {
        // plugin names must be unique even across packages
        throw new IllegalArgumentException(String.format("A plugin class name collision was detected (%s/%s). " + "Simple names of plugin classes must be unique across packages.", pluggableClass.getName(), allDiscoveredReadFilters.get(simpleName).getClass().getName()));
    } else if (toolDefaultReadFilters.containsKey(simpleName)) {
        // an instance of this class was provided by the tool as one of it's default filters;
        // use the default instance as the target for command line argument values
        // rather than creating a new one, in case it has state provided by the tool
        readFilter = toolDefaultReadFilters.get(simpleName);
    } else {
        readFilter = (ReadFilter) pluggableClass.newInstance();
    }
    // Add all filters to the allDiscoveredReadFilters list, even if the instance came from the
    // tool defaults list (we want the actual instances to be shared to preserve state)
    allDiscoveredReadFilters.put(simpleName, readFilter);
    return readFilter;
}
Also used : CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter)

Example 17 with ReadFilter

use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.

the class BQSRPipelineSpark method runTool.

@Override
protected void runTool(final JavaSparkContext ctx) {
    if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
        throw new UserException.Require2BitReferenceForBroadcast();
    }
    //Should this get the getUnfilteredReads? getReads will merge default and command line filters.
    //but the code below uses other filters for other parts of the pipeline that do not honor
    //the commandline.
    final JavaRDD<GATKRead> initialReads = getReads();
    // The initial reads have already had the WellformedReadFilter applied to them, which
    // is all the filtering that ApplyBQSR wants. BQSR itself wants additional filtering
    // performed, so we do that here.
    //NOTE: this filter doesn't honor enabled/disabled commandline filters
    final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
    final JavaRDD<GATKRead> filteredReadsForBQSR = initialReads.filter(read -> bqsrReadFilter.test(read));
    final VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
    final JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariants, getIntervals());
    final JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, filteredReadsForBQSR, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
    //note: we use the reference dictionary from the reads themselves.
    final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getHeaderForReads().getSequenceDictionary(), bqsrArgs);
    final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
    final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(initialReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
    writeReads(ctx, output, finalReads);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) VariantsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)

Example 18 with ReadFilter

use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.

the class LocalReadShardUnitTest method shardIterationTestData.

@DataProvider(name = "ShardIterationTestData")
public Object[][] shardIterationTestData() {
    final ReadsDataSource readsSource = new ReadsDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
    final ReadFilter keepReadBOnly = new ReadFilter() {

        private static final long serialVersionUID = 1l;

        @Override
        public boolean test(GATKRead read) {
            return read.getName().equals("b");
        }

        ;
    };
    final LocalReadShard filteredShard = new LocalReadShard(new SimpleInterval("1", 200, 210), new SimpleInterval("1", 200, 210), readsSource);
    filteredShard.setReadFilter(keepReadBOnly);
    final ReadsDownsampler readsBAndCOnlyDownsampler = new KeepReadsBAndCOnlyDownsampler();
    final LocalReadShard downsampledShard = new LocalReadShard(new SimpleInterval("1", 1, 5000), new SimpleInterval("1", 1, 5000), readsSource);
    downsampledShard.setDownsampler(readsBAndCOnlyDownsampler);
    return new Object[][] { { new LocalReadShard(new SimpleInterval("1", 200, 210), new SimpleInterval("1", 200, 210), readsSource), Arrays.asList("a", "b", "c") }, { new LocalReadShard(new SimpleInterval("1", 200, 209), new SimpleInterval("1", 200, 209), readsSource), Arrays.asList("a", "b") }, { new LocalReadShard(new SimpleInterval("1", 200, 204), new SimpleInterval("1", 200, 204), readsSource), Arrays.asList("a") }, { new LocalReadShard(new SimpleInterval("1", 200, 204), new SimpleInterval("1", 190, 210), readsSource), Arrays.asList("a", "b", "c") }, { new LocalReadShard(new SimpleInterval("1", 200, 204), new SimpleInterval("1", 200, 205), readsSource), Arrays.asList("a", "b") }, { new LocalReadShard(new SimpleInterval("1", 400, 500), new SimpleInterval("1", 400, 500), readsSource), Collections.<String>emptyList() }, { filteredShard, Arrays.asList("b") }, { downsampledShard, Arrays.asList("b", "c") } };
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadsDownsampler(org.broadinstitute.hellbender.utils.downsampling.ReadsDownsampler) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) DataProvider(org.testng.annotations.DataProvider)

Example 19 with ReadFilter

use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.

the class IntervalWalker method traverse.

@Override
public void traverse() {
    final ReadFilter readFilter = makeReadFilter();
    for (final SimpleInterval interval : intervalsForTraversal) {
        apply(interval, new ReadsContext(reads, interval, readFilter), new ReferenceContext(reference, interval), new FeatureContext(features, interval));
        progressMeter.update(interval);
    }
}
Also used : ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 20 with ReadFilter

use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.

the class InsertSizeMetricsCollectorSparkUnitTest method test.

@Test(dataProvider = "metricsfiles", groups = "spark")
public void test(final String fileName, final String referenceName, final boolean allLevels, final String expectedResultsFile) throws IOException {
    final String inputPath = new File(TEST_DATA_DIR, fileName).getAbsolutePath();
    final String referencePath = referenceName != null ? new File(referenceName).getAbsolutePath() : null;
    final File outfile = BaseTest.createTempFile("test", ".insert_size_metrics");
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    ReadsSparkSource readSource = new ReadsSparkSource(ctx, ValidationStringency.DEFAULT_STRINGENCY);
    SAMFileHeader samHeader = readSource.getHeader(inputPath, referencePath);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputPath, referencePath);
    InsertSizeMetricsArgumentCollection isArgs = new InsertSizeMetricsArgumentCollection();
    isArgs.output = outfile.getAbsolutePath();
    if (allLevels) {
        isArgs.metricAccumulationLevel.accumulationLevels = new HashSet<>();
        isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.ALL_READS);
        isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.SAMPLE);
        isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.LIBRARY);
        isArgs.metricAccumulationLevel.accumulationLevels.add(MetricAccumulationLevel.READ_GROUP);
    }
    InsertSizeMetricsCollectorSpark isSpark = new InsertSizeMetricsCollectorSpark();
    isSpark.initialize(isArgs, samHeader, null);
    // Since we're bypassing the framework in order to force this test to run on multiple partitions, we
    // need to make the read filter manually since we don't have the plugin descriptor to do it for us; so
    // remove the (default) FirstOfPairReadFilter filter and add in the SECOND_IN_PAIR manually since thats
    // required for our tests to pass
    List<ReadFilter> readFilters = isSpark.getDefaultReadFilters();
    readFilters.stream().filter(f -> !f.getClass().getSimpleName().equals(ReadFilterLibrary.FirstOfPairReadFilter.class.getSimpleName()));
    ReadFilter rf = ReadFilter.fromList(readFilters, samHeader);
    // Force the input RDD to be split into two partitions to ensure that the
    // reduce/combiners run
    rddParallelReads = rddParallelReads.repartition(2);
    isSpark.collectMetrics(rddParallelReads.filter(r -> rf.test(r)), samHeader);
    isSpark.saveMetrics(fileName, null);
    IntegrationTestSpec.assertEqualTextFiles(outfile, new File(TEST_DATA_DIR, expectedResultsFile), "#");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Test(org.testng.annotations.Test) IOException(java.io.IOException) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) IntegrationTestSpec(org.broadinstitute.hellbender.utils.test.IntegrationTestSpec) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) HashSet(java.util.HashSet) List(java.util.List) ReadsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource) InsertSizeMetricsArgumentCollection(org.broadinstitute.hellbender.metrics.InsertSizeMetricsArgumentCollection) MetricAccumulationLevel(org.broadinstitute.hellbender.metrics.MetricAccumulationLevel) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) JavaRDD(org.apache.spark.api.java.JavaRDD) ReadFilterLibrary(org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary) ReadsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource) InsertSizeMetricsArgumentCollection(org.broadinstitute.hellbender.metrics.InsertSizeMetricsArgumentCollection) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

ReadFilter (org.broadinstitute.hellbender.engine.filters.ReadFilter)25 WellformedReadFilter (org.broadinstitute.hellbender.engine.filters.WellformedReadFilter)12 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)10 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)7 File (java.io.File)6 ReadFilterLibrary (org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary)6 CountingReadFilter (org.broadinstitute.hellbender.engine.filters.CountingReadFilter)5 MappingQualityReadFilter (org.broadinstitute.hellbender.engine.filters.MappingQualityReadFilter)5 ArrayList (java.util.ArrayList)4 JavaRDD (org.apache.spark.api.java.JavaRDD)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 SAMFileHeader (htsjdk.samtools.SAMFileHeader)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 java.util (java.util)3 Argument (org.broadinstitute.barclay.argparser.Argument)3 CommandLineProgramProperties (org.broadinstitute.barclay.argparser.CommandLineProgramProperties)3 DocumentedFeature (org.broadinstitute.barclay.help.DocumentedFeature)3 GATKSparkTool (org.broadinstitute.hellbender.engine.spark.GATKSparkTool)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)3