use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class CollectMultipleMetricsSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
final JavaRDD<GATKRead> unFilteredReads = getUnfilteredReads();
List<SparkCollectorProvider> collectorsToRun = getCollectorsToRun();
if (collectorsToRun.size() > 1) {
// if there is more than one collector to run, cache the
// unfiltered RDD so we don't recompute it
unFilteredReads.cache();
}
for (final SparkCollectorProvider provider : collectorsToRun) {
MetricsCollectorSpark<? extends MetricsArgumentCollection> metricsCollector = provider.createCollector(outputBaseName, metricAccumulationLevel.accumulationLevels, getDefaultHeaders(), getHeaderForReads());
validateCollector(metricsCollector, collectorsToRun.get(collectorsToRun.indexOf(provider)).getClass().getName());
// Execute the collector's lifecycle
//Bypass the framework merging of command line filters and just apply the default
//ones specified by the collector
ReadFilter readFilter = ReadFilter.fromList(metricsCollector.getDefaultReadFilters(), getHeaderForReads());
metricsCollector.collectMetrics(unFilteredReads.filter(r -> readFilter.test(r)), getHeaderForReads());
metricsCollector.saveMetrics(getReadSourceName(), getAuthHolder());
}
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class CountReadsSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
final JavaRDD<GATKRead> reads = getReads();
final long count = reads.count();
System.out.println(count);
if (out != null) {
try (final PrintStream ps = new PrintStream(BucketUtils.createFile(out))) {
ps.print(count);
}
}
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class FlagStatSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
final JavaRDD<GATKRead> reads = getReads();
final FlagStatus result = reads.aggregate(new FlagStatus(), FlagStatus::add, FlagStatus::merge);
System.out.println(result);
if (out != null) {
try (final PrintStream ps = new PrintStream(BucketUtils.createFile(out))) {
ps.print(result);
}
}
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class ReadsPipelineSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
if (joinStrategy == JoinStrategy.BROADCAST && !getReference().isCompatibleWithSparkBroadcast()) {
throw new UserException.Require2BitReferenceForBroadcast();
}
//TOOO: should this use getUnfilteredReads? getReads will apply default and command line filters
final JavaRDD<GATKRead> initialReads = getReads();
final JavaRDD<GATKRead> markedReadsWithOD = MarkDuplicatesSpark.mark(initialReads, getHeaderForReads(), duplicatesScoringStrategy, new OpticalDuplicateFinder(), getRecommendedNumReducers());
final JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.cleanupTemporaryAttributes(markedReadsWithOD);
// The markedReads have already had the WellformedReadFilter applied to them, which
// is all the filtering that MarkDupes and ApplyBQSR want. BQSR itself wants additional
// filtering performed, so we do that here.
//NOTE: this doesn't honor enabled/disabled commandline filters
final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
final JavaRDD<GATKRead> markedFilteredReadsForBQSR = markedReads.filter(read -> bqsrReadFilter.test(read));
VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariants, getIntervals());
JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, markedFilteredReadsForBQSR, getReference(), bqsrKnownVariants, joinStrategy, getReferenceSequenceDictionary(), readShardSize, readShardPadding);
final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getReferenceSequenceDictionary(), bqsrArgs);
final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(markedReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
writeReads(ctx, output, finalReads);
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class ReadsDownsamplingIterator method next.
@Override
public GATKRead next() {
if (nextRead == null) {
throw new NoSuchElementException("next() called when there are no more items");
}
final GATKRead toReturn = nextRead;
advanceToNextRead();
return toReturn;
}
Aggregations