Search in sources :

Example 6 with Resource

use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.

the class VariantRecalibrator method onTraversalSuccess.

//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
    // finish processing any queued variants
    consumeQueuedVariants();
    for (int i = 1; i <= max_attempts; i++) {
        try {
            dataManager.setData(reduceSum);
            // Each data point is now (x - mean) / standard deviation
            dataManager.normalizeData();
            // Generate the positive model using the training data and evaluate each variant
            final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
            final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
            engine.evaluateData(dataManager.getData(), goodModel, false);
            // Generate the negative model using the worst performing data and evaluate each variant contrastively
            final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
            final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
            // Don't need the aggregate data anymore so let's free up the memory
            dataManager.dropAggregateData();
            engine.evaluateData(dataManager.getData(), badModel, true);
            if (badModel.failedToConverge || goodModel.failedToConverge) {
                throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
            }
            if (outputModel) {
                GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
                try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
                    report.print(modelReportStream);
                } catch (FileNotFoundException e) {
                    throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
                }
            }
            engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
            // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
            final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
            final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
            final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
            tranchesStream.print(Tranche.tranchesString(tranches));
            logger.info("Writing out recalibration table...");
            dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
            if (RSCRIPT_FILE != null) {
                logger.info("Writing out visualization Rscript file...");
                createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
            }
            if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
                // Print out an info message to make it clear why the tranches plot is not generated
                logger.info("Tranches plot will not be generated since we are running in INDEL mode");
            } else {
                // Execute the RScript command to plot the table of truth values
                RScriptExecutor executor = new RScriptExecutor();
                executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
                executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
                // Print out the command line to make it clear to the user what is being executed and how one might modify it
                logger.info("Executing: " + executor.getApproximateCommandLine());
                executor.exec();
            }
            return true;
        } catch (Exception e) {
            // IllegalArgumentException.
            if (i == max_attempts) {
                throw e;
            } else {
                logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
            }
        }
    }
    return false;
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) Resource(org.broadinstitute.hellbender.utils.io.Resource) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 7 with Resource

use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.

the class RScriptExecutor method exec.

public boolean exec() {
    if (!RSCRIPT_EXISTS) {
        if (!ignoreExceptions) {
            throw new UserException.CannotExecuteRScript(RSCRIPT_MISSING_MESSAGE);
        } else {
            logger.warn("Skipping: " + getApproximateCommandLine());
            return false;
        }
    }
    List<File> tempFiles = new ArrayList<>();
    try {
        File tempLibSourceDir = IOUtils.tempDir("RlibSources.", "");
        File tempLibInstallationDir = IOUtils.tempDir("Rlib.", "");
        tempFiles.add(tempLibSourceDir);
        tempFiles.add(tempLibInstallationDir);
        StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibInstallationDir).append("';");
        if (!this.libraries.isEmpty()) {
            List<String> tempLibraryPaths = new ArrayList<>();
            for (RScriptLibrary library : this.libraries) {
                File tempLibrary = library.writeLibrary(tempLibSourceDir);
                tempFiles.add(tempLibrary);
                tempLibraryPaths.add(tempLibrary.getAbsolutePath());
            }
            expression.append("install.packages(");
            expression.append("pkgs=c('").append(StringUtils.join(tempLibraryPaths, "', '")).append("'), lib=tempLibDir, repos=NULL, type='source', ");
            // Install faster by eliminating cruft.
            expression.append("INSTALL_opts=c('--no-libs', '--no-data', '--no-help', '--no-demo', '--no-exec')");
            expression.append(");");
            for (RScriptLibrary library : this.libraries) {
                expression.append("library('").append(library.getLibraryName()).append("', lib.loc=tempLibDir);");
            }
        }
        for (Resource script : this.scriptResources) {
            File tempScript = IOUtils.writeTempResource(script);
            tempFiles.add(tempScript);
            expression.append("source('").append(tempScript.getAbsolutePath()).append("');");
        }
        for (File script : this.scriptFiles) {
            expression.append("source('").append(script.getAbsolutePath()).append("');");
        }
        String[] cmd = new String[this.args.size() + 3];
        int i = 0;
        cmd[i++] = RSCRIPT_BINARY;
        cmd[i++] = "-e";
        cmd[i++] = expression.toString();
        for (String arg : this.args) cmd[i++] = arg;
        ProcessSettings processSettings = new ProcessSettings(cmd);
        //if debug is enabled, output the stdout and stdder, otherwise capture it to a buffer
        if (logger.isDebugEnabled()) {
            processSettings.getStdoutSettings().printStandard(true);
            processSettings.getStderrSettings().printStandard(true);
        } else {
            processSettings.getStdoutSettings().setBufferSize(8192);
            processSettings.getStderrSettings().setBufferSize(8192);
        }
        ProcessController controller = ProcessController.getThreadLocal();
        if (logger.isDebugEnabled()) {
            logger.debug("Executing:");
            for (String arg : cmd) logger.debug("  " + arg);
        }
        ProcessOutput po = controller.exec(processSettings);
        int exitValue = po.getExitValue();
        logger.debug("Result: " + exitValue);
        if (exitValue != 0) {
            StringBuilder message = new StringBuilder();
            message.append(String.format("\nRscript exited with %d\nCommand Line: %s", exitValue, String.join(" ", cmd)));
            //if debug was enabled the stdout/error were already output somewhere
            if (!logger.isDebugEnabled()) {
                message.append(String.format("\nStdout: %s\nStderr: %s", po.getStdout().getBufferString(), po.getStderr().getBufferString()));
            }
            throw new RScriptExecutorException(message.toString());
        }
        return true;
    } catch (GATKException e) {
        if (!ignoreExceptions) {
            throw e;
        } else {
            logger.warn(e.getMessage());
            return false;
        }
    } finally {
        for (File temp : tempFiles) FileUtils.deleteQuietly(temp);
    }
}
Also used : ArrayList(java.util.ArrayList) Resource(org.broadinstitute.hellbender.utils.io.Resource) ProcessController(org.broadinstitute.hellbender.utils.runtime.ProcessController) ProcessOutput(org.broadinstitute.hellbender.utils.runtime.ProcessOutput) ProcessSettings(org.broadinstitute.hellbender.utils.runtime.ProcessSettings) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) File(java.io.File)

Example 8 with Resource

use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.

the class CollectRnaSeqMetrics method finish.

@Override
protected void finish() {
    collector.finish();
    final MetricsFile<RnaSeqMetrics, Integer> file = getMetricsFile();
    collector.addAllLevelsToFile(file);
    file.write(OUTPUT);
    boolean atLeastOneHistogram = false;
    for (Histogram<Integer> histo : file.getAllHistograms()) {
        atLeastOneHistogram = atLeastOneHistogram || !histo.isEmpty();
    }
    // Generate the coverage by position plot
    if (CHART_OUTPUT != null && atLeastOneHistogram) {
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, CollectRnaSeqMetrics.class));
        executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
        executor.exec();
    }
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor)

Example 9 with Resource

use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.

the class CollectRrbsMetrics method doWork.

@Override
protected Object doWork() {
    if (!METRICS_FILE_PREFIX.endsWith(".")) {
        METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
    }
    final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
    final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
    final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
    assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
    final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final ProgressLogger progressLogger = new ProgressLogger(logger);
    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
    for (final SAMRecord samRecord : samReader) {
        progressLogger.record(samRecord);
        if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
            final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
            metricsCollector.acceptRecord(samRecord, referenceSequence);
        }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
    metricsCollector.addAllLevelsToFile(rrbsMetrics);
    // Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
    // we get it out split it apart to the two separate MetricsFiles and write them to file
    final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
    final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
    for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
        summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
        for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
            detailsFile.addMetric(detailMetric);
        }
    }
    summaryFile.write(SUMMARY_OUT);
    detailsFile.write(DETAILS_OUT);
    if (PRODUCE_PLOT) {
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
        executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
        executor.exec();
    }
    CloserUtil.close(samReader);
    return null;
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 10 with Resource

use of org.broadinstitute.hellbender.utils.io.Resource in project gatk by broadinstitute.

the class MeanQualityByCycle method finish.

@Override
protected void finish() {
    // Generate a "Histogram" of mean quality and write it to the file
    final MetricsFile<?, Integer> metrics = getMetricsFile();
    metrics.addHistogram(q.getMeanQualityHistogram());
    if (!oq.isEmpty())
        metrics.addHistogram(oq.getMeanQualityHistogram());
    metrics.write(OUTPUT);
    if (q.isEmpty() && oq.isEmpty()) {
        logger.warn("No valid bases found in input file. No plot will be produced.");
    } else if (PRODUCE_PLOT) {
        // Now run R to generate a chart
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, MeanQualityByCycle.class));
        executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
        executor.exec();
    }
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor)

Aggregations

Resource (org.broadinstitute.hellbender.utils.io.Resource)16 RScriptExecutor (org.broadinstitute.hellbender.utils.R.RScriptExecutor)14 File (java.io.File)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 MetricsFile (htsjdk.samtools.metrics.MetricsFile)2 DoubleStream (java.util.stream.DoubleStream)2 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)2 SAMRecord (htsjdk.samtools.SAMRecord)1 SamReader (htsjdk.samtools.SamReader)1 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)1 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)1 Histogram (htsjdk.samtools.util.Histogram)1 FileNotFoundException (java.io.FileNotFoundException)1 PrintStream (java.io.PrintStream)1 NumberFormat (java.text.NumberFormat)1 ArrayList (java.util.ArrayList)1 CommandLineException (org.broadinstitute.barclay.argparser.CommandLineException)1 GATKReport (org.broadinstitute.hellbender.utils.report.GATKReport)1 ProcessController (org.broadinstitute.hellbender.utils.runtime.ProcessController)1