use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalSuccess.
//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
// finish processing any queued variants
consumeQueuedVariants();
for (int i = 1; i <= max_attempts; i++) {
try {
dataManager.setData(reduceSum);
// Each data point is now (x - mean) / standard deviation
dataManager.normalizeData();
// Generate the positive model using the training data and evaluate each variant
final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
engine.evaluateData(dataManager.getData(), goodModel, false);
// Generate the negative model using the worst performing data and evaluate each variant contrastively
final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
// Don't need the aggregate data anymore so let's free up the memory
dataManager.dropAggregateData();
engine.evaluateData(dataManager.getData(), badModel, true);
if (badModel.failedToConverge || goodModel.failedToConverge) {
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
}
if (outputModel) {
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
report.print(modelReportStream);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
}
}
engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
tranchesStream.print(Tranche.tranchesString(tranches));
logger.info("Writing out recalibration table...");
dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
if (RSCRIPT_FILE != null) {
logger.info("Writing out visualization Rscript file...");
createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
}
if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
// Print out an info message to make it clear why the tranches plot is not generated
logger.info("Tranches plot will not be generated since we are running in INDEL mode");
} else {
// Execute the RScript command to plot the table of truth values
RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
// Print out the command line to make it clear to the user what is being executed and how one might modify it
logger.info("Executing: " + executor.getApproximateCommandLine());
executor.exec();
}
return true;
} catch (Exception e) {
// IllegalArgumentException.
if (i == max_attempts) {
throw e;
} else {
logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
}
}
}
return false;
}
use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.
the class RScriptExecutor method exec.
public boolean exec() {
if (!RSCRIPT_EXISTS) {
if (!ignoreExceptions) {
throw new UserException.CannotExecuteRScript(RSCRIPT_MISSING_MESSAGE);
} else {
logger.warn("Skipping: " + getApproximateCommandLine());
return false;
}
}
List<File> tempFiles = new ArrayList<>();
try {
File tempLibSourceDir = IOUtils.tempDir("RlibSources.", "");
File tempLibInstallationDir = IOUtils.tempDir("Rlib.", "");
tempFiles.add(tempLibSourceDir);
tempFiles.add(tempLibInstallationDir);
StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibInstallationDir).append("';");
if (!this.libraries.isEmpty()) {
List<String> tempLibraryPaths = new ArrayList<>();
for (RScriptLibrary library : this.libraries) {
File tempLibrary = library.writeLibrary(tempLibSourceDir);
tempFiles.add(tempLibrary);
tempLibraryPaths.add(tempLibrary.getAbsolutePath());
}
expression.append("install.packages(");
expression.append("pkgs=c('").append(StringUtils.join(tempLibraryPaths, "', '")).append("'), lib=tempLibDir, repos=NULL, type='source', ");
// Install faster by eliminating cruft.
expression.append("INSTALL_opts=c('--no-libs', '--no-data', '--no-help', '--no-demo', '--no-exec')");
expression.append(");");
for (RScriptLibrary library : this.libraries) {
expression.append("library('").append(library.getLibraryName()).append("', lib.loc=tempLibDir);");
}
}
for (Resource script : this.scriptResources) {
File tempScript = IOUtils.writeTempResource(script);
tempFiles.add(tempScript);
expression.append("source('").append(tempScript.getAbsolutePath()).append("');");
}
for (File script : this.scriptFiles) {
expression.append("source('").append(script.getAbsolutePath()).append("');");
}
String[] cmd = new String[this.args.size() + 3];
int i = 0;
cmd[i++] = RSCRIPT_BINARY;
cmd[i++] = "-e";
cmd[i++] = expression.toString();
for (String arg : this.args) cmd[i++] = arg;
ProcessSettings processSettings = new ProcessSettings(cmd);
//if debug is enabled, output the stdout and stdder, otherwise capture it to a buffer
if (logger.isDebugEnabled()) {
processSettings.getStdoutSettings().printStandard(true);
processSettings.getStderrSettings().printStandard(true);
} else {
processSettings.getStdoutSettings().setBufferSize(8192);
processSettings.getStderrSettings().setBufferSize(8192);
}
ProcessController controller = ProcessController.getThreadLocal();
if (logger.isDebugEnabled()) {
logger.debug("Executing:");
for (String arg : cmd) logger.debug(" " + arg);
}
ProcessOutput po = controller.exec(processSettings);
int exitValue = po.getExitValue();
logger.debug("Result: " + exitValue);
if (exitValue != 0) {
StringBuilder message = new StringBuilder();
message.append(String.format("\nRscript exited with %d\nCommand Line: %s", exitValue, String.join(" ", cmd)));
//if debug was enabled the stdout/error were already output somewhere
if (!logger.isDebugEnabled()) {
message.append(String.format("\nStdout: %s\nStderr: %s", po.getStdout().getBufferString(), po.getStderr().getBufferString()));
}
throw new RScriptExecutorException(message.toString());
}
return true;
} catch (GATKException e) {
if (!ignoreExceptions) {
throw e;
} else {
logger.warn(e.getMessage());
return false;
}
} finally {
for (File temp : tempFiles) FileUtils.deleteQuietly(temp);
}
}
use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.
the class CollectRnaSeqMetrics method finish.
@Override
protected void finish() {
collector.finish();
final MetricsFile<RnaSeqMetrics, Integer> file = getMetricsFile();
collector.addAllLevelsToFile(file);
file.write(OUTPUT);
boolean atLeastOneHistogram = false;
for (Histogram<Integer> histo : file.getAllHistograms()) {
atLeastOneHistogram = atLeastOneHistogram || !histo.isEmpty();
}
// Generate the coverage by position plot
if (CHART_OUTPUT != null && atLeastOneHistogram) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRnaSeqMetrics.class));
executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
executor.exec();
}
}
use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.
the class CollectRrbsMetrics method doWork.
@Override
protected Object doWork() {
if (!METRICS_FILE_PREFIX.endsWith(".")) {
METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
}
final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
}
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final ProgressLogger progressLogger = new ProgressLogger(logger);
final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
for (final SAMRecord samRecord : samReader) {
progressLogger.record(samRecord);
if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
metricsCollector.acceptRecord(samRecord, referenceSequence);
}
}
metricsCollector.finish();
final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
metricsCollector.addAllLevelsToFile(rrbsMetrics);
// Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
// we get it out split it apart to the two separate MetricsFiles and write them to file
final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
detailsFile.addMetric(detailMetric);
}
}
summaryFile.write(SUMMARY_OUT);
detailsFile.write(DETAILS_OUT);
if (PRODUCE_PLOT) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
executor.exec();
}
CloserUtil.close(samReader);
return null;
}
use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.
the class MeanQualityByCycle method finish.
@Override
protected void finish() {
// Generate a "Histogram" of mean quality and write it to the file
final MetricsFile<?, Integer> metrics = getMetricsFile();
metrics.addHistogram(q.getMeanQualityHistogram());
if (!oq.isEmpty())
metrics.addHistogram(oq.getMeanQualityHistogram());
metrics.write(OUTPUT);
if (q.isEmpty() && oq.isEmpty()) {
logger.warn("No valid bases found in input file. No plot will be produced.");
} else if (PRODUCE_PLOT) {
// Now run R to generate a chart
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, MeanQualityByCycle.class));
executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
executor.exec();
}
}
Aggregations