use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.
the class ApplyBQSRSpark method runTool.
@Override
protected void runTool(JavaSparkContext ctx) {
JavaRDD<GATKRead> initialReads = getReads();
// null if we have no api key
final GCSOptions gcsOptions = getAuthenticatedGCSOptions();
Broadcast<RecalibrationReport> recalibrationReportBroadCast = ctx.broadcast(new RecalibrationReport(BucketUtils.openFile(bqsrRecalFile)));
final JavaRDD<GATKRead> recalibratedReads = ApplyBQSRSparkFn.apply(initialReads, recalibrationReportBroadCast, getHeaderForReads(), applyBQSRArgs);
writeReads(ctx, output, recalibratedReads);
}
use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.
the class BaseRecalibratorSpark method runTool.
@Override
protected void runTool(JavaSparkContext ctx) {
if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
throw new UserException.Require2BitReferenceForBroadcast();
}
JavaRDD<GATKRead> initialReads = getReads();
VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(knownVariants, getIntervals());
// TODO: Look into broadcasting the reference to all of the workers. This would make AddContextDataToReadSpark
// TODO: and ApplyBQSRStub simpler (#855).
JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, initialReads, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
// TODO: broadcast the reads header?
final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getReferenceSequenceDictionary(), bqsrArgs);
try (final PrintStream reportStream = new PrintStream(BucketUtils.createFile(outputTablesPath))) {
RecalUtils.outputRecalibrationReport(reportStream, bqsrArgs, bqsrReport.getQuantizationInfo(), bqsrReport.getRecalibrationTables(), bqsrReport.getCovariates());
}
}
use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.
the class ReadsPipelineSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
throw new UserException.Require2BitReferenceForBroadcast();
}
//TOOO: should this use getUnfilteredReads? getReads will apply default and command line filters
final JavaRDD<GATKRead> initialReads = getReads();
final JavaRDD<GATKRead> markedReadsWithOD = MarkDuplicatesSpark.mark(initialReads, getHeaderForReads(), duplicatesScoringStrategy, new OpticalDuplicateFinder(), getRecommendedNumReducers());
final JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.cleanupTemporaryAttributes(markedReadsWithOD);
// The markedReads have already had the WellformedReadFilter applied to them, which
// is all the filtering that MarkDupes and ApplyBQSR want. BQSR itself wants additional
// filtering performed, so we do that here.
//NOTE: this doesn't honor enabled/disabled commandline filters
final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
final JavaRDD<GATKRead> markedFilteredReadsForBQSR = markedReads.filter(read -> bqsrReadFilter.test(read));
VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariants, getIntervals());
JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, markedFilteredReadsForBQSR, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getReferenceSequenceDictionary(), bqsrArgs);
final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(markedReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
writeReads(ctx, output, finalReads);
}
use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.
the class AnalyzeCovariates method doWork.
@Override
public Object doWork() {
checkArgumentsValues();
final Map<String, File> reportFiles = buildReportFileMap();
final Map<String, RecalibrationReport> reports = buildReportMap(reportFiles);
checkReportConsistency(reports);
final File csvFile = resolveCsvFile();
generateCsvFile(csvFile, reports);
final File plotFile = resolvePlotFile();
generatePlots(csvFile, reportFiles, plotFile);
return Optional.empty();
}
use of org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport in project gatk by broadinstitute.
the class ApplyBQSRSparkFn method apply.
public static JavaRDD<GATKRead> apply(JavaRDD<GATKRead> reads, final Broadcast<RecalibrationReport> reportBroadcast, final SAMFileHeader readsHeader, ApplyBQSRArgumentCollection args) {
return reads.mapPartitions(readsIterator -> {
final RecalibrationReport report = reportBroadcast.getValue();
final BQSRReadTransformer transformer = new BQSRReadTransformer(readsHeader, report, args);
final Iterable<GATKRead> readsIterable = () -> readsIterator;
return Utils.stream(readsIterable).map(read -> transformer.apply(read)).collect(Collectors.toList()).iterator();
});
}
Aggregations