Search in sources :

Example 1 with RecalibrationReport

use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.

the class ApplyBQSRSpark method runTool.

@Override
protected void runTool(JavaSparkContext ctx) {
    JavaRDD<GATKRead> initialReads = getReads();
    // null if we have no api key
    final GCSOptions gcsOptions = getAuthenticatedGCSOptions();
    Broadcast<RecalibrationReport> recalibrationReportBroadCast = ctx.broadcast(new RecalibrationReport(BucketUtils.openFile(bqsrRecalFile)));
    final JavaRDD<GATKRead> recalibratedReads = ApplyBQSRSparkFn.apply(initialReads, recalibrationReportBroadCast, getHeaderForReads(), applyBQSRArgs);
    writeReads(ctx, output, recalibratedReads);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions)

Example 2 with RecalibrationReport

use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.

the class BaseRecalibratorSpark method runTool.

@Override
protected void runTool(JavaSparkContext ctx) {
    if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
        throw new UserException.Require2BitReferenceForBroadcast();
    }
    JavaRDD<GATKRead> initialReads = getReads();
    VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
    JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(knownVariants, getIntervals());
    // TODO: Look into broadcasting the reference to all of the workers. This would make AddContextDataToReadSpark
    // TODO: and ApplyBQSRStub simpler (#855).
    JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, initialReads, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
    // TODO: broadcast the reads header?
    final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getReferenceSequenceDictionary(), bqsrArgs);
    try (final PrintStream reportStream = new PrintStream(BucketUtils.createFile(outputTablesPath))) {
        RecalUtils.outputRecalibrationReport(reportStream, bqsrArgs, bqsrReport.getQuantizationInfo(), bqsrReport.getRecalibrationTables(), bqsrReport.getCovariates());
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) PrintStream(java.io.PrintStream) VariantsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)

Example 3 with RecalibrationReport

use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.

the class ReadsPipelineSpark method runTool.

@Override
protected void runTool(final JavaSparkContext ctx) {
    if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
        throw new UserException.Require2BitReferenceForBroadcast();
    }
    //TOOO: should this use getUnfilteredReads? getReads will apply default and command line filters
    final JavaRDD<GATKRead> initialReads = getReads();
    final JavaRDD<GATKRead> markedReadsWithOD = MarkDuplicatesSpark.mark(initialReads, getHeaderForReads(), duplicatesScoringStrategy, new OpticalDuplicateFinder(), getRecommendedNumReducers());
    final JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.cleanupTemporaryAttributes(markedReadsWithOD);
    // The markedReads have already had the WellformedReadFilter applied to them, which
    // is all the filtering that MarkDupes and ApplyBQSR want. BQSR itself wants additional
    // filtering performed, so we do that here.
    //NOTE: this doesn't honor enabled/disabled commandline filters
    final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
    final JavaRDD<GATKRead> markedFilteredReadsForBQSR = markedReads.filter(read -> bqsrReadFilter.test(read));
    VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
    JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariants, getIntervals());
    JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, markedFilteredReadsForBQSR, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
    final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getReferenceSequenceDictionary(), bqsrArgs);
    final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
    final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(markedReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
    writeReads(ctx, output, finalReads);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) OpticalDuplicateFinder(org.broadinstitute.hellbender.utils.read.markduplicates.OpticalDuplicateFinder) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) VariantsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)

Example 4 with RecalibrationReport

use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.

the class AnalyzeCovariates method doWork.

@Override
public Object doWork() {
    checkArgumentsValues();
    final Map<String, File> reportFiles = buildReportFileMap();
    final Map<String, RecalibrationReport> reports = buildReportMap(reportFiles);
    checkReportConsistency(reports);
    final File csvFile = resolveCsvFile();
    generateCsvFile(csvFile, reports);
    final File plotFile = resolvePlotFile();
    generatePlots(csvFile, reportFiles, plotFile);
    return Optional.empty();
}
Also used : File(java.io.File) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)

Example 5 with RecalibrationReport

use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.

the class ApplyBQSRSparkFn method apply.

public static JavaRDD<GATKRead> apply(JavaRDD<GATKRead> reads, final Broadcast<RecalibrationReport> reportBroadcast, final SAMFileHeader readsHeader, ApplyBQSRArgumentCollection args) {
    return reads.mapPartitions(readsIterator -> {
        final RecalibrationReport report = reportBroadcast.getValue();
        final BQSRReadTransformer transformer = new BQSRReadTransformer(readsHeader, report, args);
        final Iterable<GATKRead> readsIterable = () -> readsIterator;
        return Utils.stream(readsIterable).map(read -> transformer.apply(read)).collect(Collectors.toList()).iterator();
    });
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) BQSRReadTransformer(org.broadinstitute.hellbender.transformers.BQSRReadTransformer) RecalibrationReport(org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)

Aggregations

RecalibrationReport (org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)6 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)5 ReadContextData (org.broadinstitute.hellbender.engine.ReadContextData)3 VariantsSparkSource (org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource)3 GATKVariant (org.broadinstitute.hellbender.utils.variant.GATKVariant)3 ReadFilter (org.broadinstitute.hellbender.engine.filters.ReadFilter)2 GCSOptions (com.google.cloud.genomics.dataflow.utils.GCSOptions)1 File (java.io.File)1 PrintStream (java.io.PrintStream)1 BQSRReadTransformer (org.broadinstitute.hellbender.transformers.BQSRReadTransformer)1 OpticalDuplicateFinder (org.broadinstitute.hellbender.utils.read.markduplicates.OpticalDuplicateFinder)1