use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.
the class CollectRrbsMetrics method doWork.
@Override
protected Object doWork() {
if (!METRICS_FILE_PREFIX.endsWith(".")) {
METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
}
final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
}
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final ProgressLogger progressLogger = new ProgressLogger(logger);
final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
for (final SAMRecord samRecord : samReader) {
progressLogger.record(samRecord);
if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
metricsCollector.acceptRecord(samRecord, referenceSequence);
}
}
metricsCollector.finish();
final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
metricsCollector.addAllLevelsToFile(rrbsMetrics);
// Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
// we get it out split it apart to the two separate MetricsFiles and write them to file
final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
detailsFile.addMetric(detailMetric);
}
}
summaryFile.write(SUMMARY_OUT);
detailsFile.write(DETAILS_OUT);
if (PRODUCE_PLOT) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
executor.exec();
}
CloserUtil.close(samReader);
return null;
}
use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.
the class MeanQualityByCycle method finish.
@Override
protected void finish() {
// Generate a "Histogram" of mean quality and write it to the file
final MetricsFile<?, Integer> metrics = getMetricsFile();
metrics.addHistogram(q.getMeanQualityHistogram());
if (!oq.isEmpty())
metrics.addHistogram(oq.getMeanQualityHistogram());
metrics.write(OUTPUT);
if (q.isEmpty() && oq.isEmpty()) {
logger.warn("No valid bases found in input file. No plot will be produced.");
} else if (PRODUCE_PLOT) {
// Now run R to generate a chart
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, MeanQualityByCycle.class));
executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
executor.exec();
}
}
use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.
the class QualityScoreDistribution method finish.
@Override
protected void finish() {
// Built the Histograms out of the long[]s
final Histogram<Byte> qHisto = new Histogram<>("QUALITY", "COUNT_OF_Q");
final Histogram<Byte> oqHisto = new Histogram<>("QUALITY", "COUNT_OF_OQ");
for (int i = 0; i < qCounts.length; ++i) {
if (qCounts[i] > 0)
qHisto.increment((byte) i, (double) qCounts[i]);
if (oqCounts[i] > 0)
oqHisto.increment((byte) i, (double) oqCounts[i]);
}
final MetricsFile<?, Byte> metrics = getMetricsFile();
metrics.addHistogram(qHisto);
if (!oqHisto.isEmpty())
metrics.addHistogram(oqHisto);
metrics.write(OUTPUT);
if (qHisto.isEmpty() && oqHisto.isEmpty()) {
log.warn("No valid bases found in input file. No plot will be produced.");
} else if (PRODUCE_PLOT) {
// Now run R to generate a chart
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, QualityScoreDistribution.class));
executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
executor.exec();
}
}
use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk-protected by broadinstitute.
the class PlotACNVResults method writeSegmentedAlleleFractionPlot.
/**
* @param sampleName Sample name derived from input files
* @param contigNames List containing contig names
* @param contigLengths List containing contig lengths (same order as contigNames)
*/
private void writeSegmentedAlleleFractionPlot(final String sampleName, final List<String> contigNames, final List<Integer> contigLengths) {
//names separated by delimiter
final String contigNamesArg = contigNames.stream().collect(Collectors.joining(CONTIG_DELIMITER));
//lengths separated by delimiter
final String contigLengthsArg = contigLengths.stream().map(Object::toString).collect(Collectors.joining(CONTIG_DELIMITER));
final String outputDirArg = addTrailingSlashIfNecessary(outputDir);
final RScriptExecutor executor = new RScriptExecutor();
//this runs the R statement "source("CNVPlottingLibrary.R")" before the main script runs
executor.addScript(new Resource(CNV_PLOTTING_R_LIBRARY, PlotSegmentedCopyRatio.class));
executor.addScript(new Resource(ACNV_PLOTTING_R_SCRIPT, PlotACNVResults.class));
//--args is needed for Rscript to recognize other arguments properly
executor.addArgs("--args", "--sample_name=" + sampleName, "--snp_counts_file=" + snpCountsFile, "--tangent_file=" + tangentFile, "--segments_file=" + segmentsFile, "--contig_names=" + contigNamesArg, "--contig_lengths=" + contigLengthsArg, "--output_dir=" + outputDirArg, "--output_prefix=" + outputPrefix);
executor.exec();
}
use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk-protected by broadinstitute.
the class PlotSegmentedCopyRatio method writeSegmentedCopyRatioPlots.
/**
* @param sampleName Sample name derived from input files
* @param contigNames List containing contig names
* @param contigLengths List containing contig lengths (same order as contigNames)
*/
private void writeSegmentedCopyRatioPlots(final String sampleName, final List<String> contigNames, final List<Integer> contigLengths) {
//names separated by delimiter
final String contigNamesArg = contigNames.stream().collect(Collectors.joining(CONTIG_DELIMITER));
//names separated by delimiter
final String contigLengthsArg = contigLengths.stream().map(Object::toString).collect(Collectors.joining(CONTIG_DELIMITER));
final String outputDirArg = addTrailingSlashIfNecessary(outputDir);
final String isLog2InputArg = isLog2Input ? "TRUE" : "FALSE";
final RScriptExecutor executor = new RScriptExecutor();
//this runs the R statement "source("CNVPlottingLibrary.R")" before the main script runs
executor.addScript(new Resource(CNV_PLOTTING_R_LIBRARY, PlotSegmentedCopyRatio.class));
executor.addScript(new Resource(COPY_RATIO_PLOTTING_R_SCRIPT, PlotSegmentedCopyRatio.class));
//--args is needed for Rscript to recognize other arguments properly
executor.addArgs("--args", "--sample_name=" + sampleName, "--tangent_file=" + tangentFile, "--pre_tangent_file=" + preTangentFile, "--segments_file=" + segmentsFile, "--contig_names=" + contigNamesArg, "--contig_lengths=" + contigLengthsArg, "--output_dir=" + outputDirArg, "--output_prefix=" + outputPrefix, "--is_log2_input=" + isLog2InputArg);
executor.exec();
}
Aggregations