Search in sources :

Example 1 with RScriptExecutor

use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.

the class RecalUtils method generatePlots.

/**                                                s
     * Write recalibration plots into a file
     *
     * @param csvFile location of the intermediary file
     * @param maybeGzipedExampleReportFile where the report arguments are collected from.
     * @param output result plot file name.
     */
public static void generatePlots(final File csvFile, final File maybeGzipedExampleReportFile, final File output) {
    final File exampleReportFile = IOUtils.gunzipToTempIfNeeded(maybeGzipedExampleReportFile);
    final RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(loadBQSRScriptResource());
    executor.addArgs(csvFile.getAbsolutePath());
    executor.addArgs(exampleReportFile.getAbsolutePath());
    executor.addArgs(output.getAbsolutePath());
    LogManager.getLogger(RecalUtils.class).debug("R command line: " + executor.getApproximateCommandLine());
    executor.exec();
}
Also used : RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor)

Example 2 with RScriptExecutor

use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.

the class InsertSizeMetricsCollector method writeHistogramPDF.

/**
     * Calls R script to plot histogram(s) in PDF.
     */
private void writeHistogramPDF(final String inputName) throws RScriptExecutorException {
    // path to Picard R script for producing histograms in PDF files.
    final String R_SCRIPT = "insertSizeHistogram.R";
    File histFile = new File(inputArgs.histogramPlotFile);
    IOUtil.assertFileIsWritable(histFile);
    final RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(new Resource(R_SCRIPT, InsertSizeMetricsCollector.class));
    executor.addArgs(// text-based metrics file
    inputArgs.output, // PDF graphics file
    histFile.getAbsolutePath(), // input bam file
    inputName);
    if (inputArgs.histogramWidth != null) {
        executor.addArgs(String.valueOf(inputArgs.histogramWidth));
    }
    executor.exec();
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 3 with RScriptExecutor

use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.

the class VariantRecalibrator method createVisualizationScript.

//TODO: does this R code have to be embedded here?
private void createVisualizationScript(final List<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff, final String[] annotationKeys) {
    PrintStream stream;
    try {
        stream = new PrintStream(RSCRIPT_FILE);
    } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(RSCRIPT_FILE, e);
    }
    // We make extensive use of the ggplot2 R library: http://had.co.nz/ggplot2/
    stream.println("library(ggplot2)");
    // For compactPDF in R 2.13+
    stream.println("library(tools)");
    // For graphical functions R 2.14.2+
    stream.println("library(grid)");
    createArrangeFunction(stream);
    stream.println("outputPDF <- \"" + RSCRIPT_FILE + ".pdf\"");
    // Unfortunately this is a huge pdf file, BUGBUG: need to work on reducing the file size
    stream.println("pdf(outputPDF)");
    for (int iii = 0; iii < annotationKeys.length; iii++) {
        for (int jjj = iii + 1; jjj < annotationKeys.length; jjj++) {
            logger.info("Building " + annotationKeys[iii] + " x " + annotationKeys[jjj] + " plot...");
            final List<VariantDatum> fakeData = new ExpandingArrayList<>();
            double minAnn1 = 100.0, maxAnn1 = -100.0, minAnn2 = 100.0, maxAnn2 = -100.0;
            for (final VariantDatum datum : randomData) {
                minAnn1 = Math.min(minAnn1, datum.annotations[iii]);
                maxAnn1 = Math.max(maxAnn1, datum.annotations[iii]);
                minAnn2 = Math.min(minAnn2, datum.annotations[jjj]);
                maxAnn2 = Math.max(maxAnn2, datum.annotations[jjj]);
            }
            // Create a fake set of data which spans the full extent of these two annotation dimensions in order
            // to calculate the model PDF projected to 2D
            final double NUM_STEPS = 60.0;
            for (double ann1 = minAnn1; ann1 <= maxAnn1; ann1 += (maxAnn1 - minAnn1) / NUM_STEPS) {
                for (double ann2 = minAnn2; ann2 <= maxAnn2; ann2 += (maxAnn2 - minAnn2) / NUM_STEPS) {
                    final VariantDatum datum = new VariantDatum();
                    datum.prior = 0.0;
                    datum.annotations = new double[randomData.get(0).annotations.length];
                    datum.isNull = new boolean[randomData.get(0).annotations.length];
                    for (int ann = 0; ann < datum.annotations.length; ann++) {
                        datum.annotations[ann] = 0.0;
                        datum.isNull[ann] = true;
                    }
                    datum.annotations[iii] = ann1;
                    datum.annotations[jjj] = ann2;
                    datum.isNull[iii] = false;
                    datum.isNull[jjj] = false;
                    fakeData.add(datum);
                }
            }
            engine.evaluateData(fakeData, goodModel, false);
            engine.evaluateData(fakeData, badModel, true);
            stream.print("surface <- c(");
            for (final VariantDatum datum : fakeData) {
                stream.print(String.format("%.4f, %.4f, %.4f, ", dataManager.denormalizeDatum(datum.annotations[iii], iii), dataManager.denormalizeDatum(datum.annotations[jjj], jjj), Math.min(4.0, Math.max(-4.0, datum.lod))));
            }
            stream.println("NA,NA,NA)");
            stream.println("s <- matrix(surface,ncol=3,byrow=T)");
            stream.print("data <- c(");
            for (final VariantDatum datum : randomData) {
                stream.print(String.format("%.4f, %.4f, %.4f, %d, %d,", dataManager.denormalizeDatum(datum.annotations[iii], iii), dataManager.denormalizeDatum(datum.annotations[jjj], jjj), (datum.lod < lodCutoff ? -1.0 : 1.0), (datum.atAntiTrainingSite ? -1 : (datum.atTrainingSite ? 1 : 0)), (datum.isKnown ? 1 : -1)));
            }
            stream.println("NA,NA,NA,NA,1)");
            stream.println("d <- matrix(data,ncol=5,byrow=T)");
            final String surfaceFrame = "sf." + annotationKeys[iii] + "." + annotationKeys[jjj];
            final String dataFrame = "df." + annotationKeys[iii] + "." + annotationKeys[jjj];
            stream.println(surfaceFrame + " <- data.frame(x=s[,1], y=s[,2], lod=s[,3])");
            stream.println(dataFrame + " <- data.frame(x=d[,1], y=d[,2], retained=d[,3], training=d[,4], novelty=d[,5])");
            stream.println("dummyData <- " + dataFrame + "[1,]");
            stream.println("dummyData$x <- NaN");
            stream.println("dummyData$y <- NaN");
            stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
            stream.println("p1 = p +ggtitle(\"model PDF\") + labs(x=\"" + annotationKeys[iii] + "\", y=\"" + annotationKeys[jjj] + "\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\", space=\"rgb\")");
            stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
            stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
            stream.println("p2 = p + q + labs(x=\"" + annotationKeys[iii] + "\", y=\"" + annotationKeys[jjj] + "\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),guide=\"legend\",labels=c(\"filtered\",\"retained\"))");
            stream.println("p <- qplot(x,y,data=" + dataFrame + "[" + dataFrame + "$training != 0,], color=training, alpha=I(1/7)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
            stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
            stream.println("p3 = p + q + labs(x=\"" + annotationKeys[iii] + "\", y=\"" + annotationKeys[jjj] + "\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1),guide=\"legend\", labels=c(\"neg\", \"pos\"))");
            stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
            stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
            stream.println("p4 = p + q + labs(x=\"" + annotationKeys[iii] + "\", y=\"" + annotationKeys[jjj] + "\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1),guide=\"legend\", labels=c(\"novel\",\"known\"))");
            stream.println("arrange(p1, p2, p3, p4, ncol=2)");
        }
    }
    stream.println("dev.off()");
    stream.println("if (exists(\"compactPDF\")) {");
    stream.println("compactPDF(outputPDF)");
    stream.println("}");
    stream.close();
    // Execute Rscript command to generate the clustering plots
    RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(new File(RSCRIPT_FILE));
    logger.info("Executing: " + executor.getApproximateCommandLine());
    executor.exec();
}
Also used : ExpandingArrayList(org.broadinstitute.hellbender.utils.collections.ExpandingArrayList) PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 4 with RScriptExecutor

use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk by broadinstitute.

the class RCBSSegmenter method writeSegmentFile.

/**
     * Create a segmentation file using CBS in R.
     *
     * <p>https://www.bioconductor.org/packages/release/bioc/manuals/DNAcopy/man/DNAcopy.pdf</p>
     *
     * <p>Please see the above documentation for a more detailed description of the parameters.</p>
     *
     * <p>IMPORTANT:  There is no check that the weights have the same number of entries as the tnFile.</p>
     *
     * Wraps the call:
     * segment(x, weights = NULL, alpha = 0.01, nperm = 10000, p.method =
     *  c("hybrid", "perm"), min.width=2, kmax=25, nmin=200,
     *  eta=0.05, sbdry=NULL, trim = 0.025, undo.splits =
     *  c("none", "prune", "sdundo"), undo.prune=0.05,
     *  undo.SD=3, verbose=1)
     *
     * <p> Note that sbdry and verbose are unavailable through this interface. </p>
     *
     * @param sampleName Name of the sample being run through the segmenter.  Never {@code null}
     * @param tnFile Tangent-normalized targets file.  Never {@code null}
     * @param outputFile Full path to the outputted segment file.  Never {@code null}
     * @param log whether the tnFile input has already been put into log2CR.  Never {@code null}
     * @param minWidth minimum length for a segment
     * @param weightFile File containing weights for each target (doubles; one per line).  Must be the same length as what is in the tnFile (note that this is not
     *                enforced here).  Typically, 1/var(target) is what is used.  All values must be greater than 0.
     *                Use {@code null} if weighting is not desired.  These values should not be log space.
     */
public static void writeSegmentFile(final String sampleName, final String tnFile, final String outputFile, final Boolean log, final File weightFile, final double alpha, final int nperm, final PMethod pmethod, final int minWidth, final int kmax, final int nmin, final double eta, final double trim, final UndoSplits undoSplits, final double undoPrune, final int undoSD) {
    String logArg = log ? "TRUE" : "FALSE";
    final RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(new Resource(R_SCRIPT, RCBSSegmenter.class));
    /*--args is needed for Rscript to recognize other arguments properly*/
    executor.addArgs("--args", "--sample_name=" + sampleName, "--targets_file=" + tnFile, "--output_file=" + outputFile, "--log2_input=" + logArg, "--min_width=" + String.valueOf(minWidth), "--alpha=" + String.valueOf(alpha), "--nperm=" + String.valueOf(nperm), "--pmethod=" + pmethod.toString(), "--kmax=" + String.valueOf(kmax), "--nmin=" + String.valueOf(nmin), "--eta=" + String.valueOf(eta), "--trim=" + String.valueOf(trim), "--undosplits=" + undoSplits.toString(), "--undoprune=" + String.valueOf(undoPrune), "--undoSD=" + String.valueOf(undoSD));
    if (weightFile != null) {
        final double[] weights = ParamUtils.readValuesFromFile(weightFile);
        // Check to make sure that no weights are zero.
        if (!DoubleStream.of(weights).allMatch(d -> d > 0 && !Double.isNaN(d) && Double.isFinite(d))) {
            throw new GATKException("A weight for a target was zero or less, which is not allowed.  If you truly want zero, you must remove the target from consideration.");
        }
        // Add the argument to specify weights
        executor.addArgs("--weights_file=" + weightFile.getAbsolutePath());
    }
    executor.exec();
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) DoubleStream(java.util.stream.DoubleStream) Resource(org.broadinstitute.hellbender.utils.io.Resource) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 5 with RScriptExecutor

use of org.broadinstitute.hellbender.utils.R.RScriptExecutor in project gatk-protected by broadinstitute.

the class HMMUnitTest method setUp.

/**
     * This method runs the Test models and sequences against R's HMM package to
     * obtain independent estimates for the expected values for the Viterbi and Forward-Backward
     * algorithm.
     * <p>
     *     Typically this would be annotated as a <code>@BeforeClass</> method but unfortunately the data-provider
     *     code may be run before it and so we need to implement lazy initialization.
     * </p>
     * <p>
     *     Therefore the data-providers will call it once and only once before any of their data consumer test are run.
     * </p>
     *
     * @throws IOException
     */
private void setUp() throws IOException {
    final List<Integer> positions = IntStream.range(0, TEST_PATH_LENGTH).boxed().collect(Collectors.toList());
    TEST_SEQUENCES = TEST_MODELS.stream().map(m -> m.generate(positions, RANDOM)).collect(Collectors.toList());
    TEST_SEQUENCE_FILE = createTempFile("sequences", ".seq");
    final PrintWriter writer = new PrintWriter(new FileWriter(TEST_SEQUENCE_FILE));
    TEST_SEQUENCES.forEach(s -> {
        final StringBuilder builder = new StringBuilder(TEST_PATH_LENGTH);
        s.getSecond().forEach(c -> {
            builder.append(c.toString());
            builder.append(',');
        });
        builder.setLength(builder.length() - 1);
        writer.println(builder.toString());
    });
    writer.close();
    TEST_R_RESULTS_FILE = createTempFile("r-output", ".tab");
    RScriptExecutor rExecutor = new RScriptExecutor();
    final File script = composeRScriptFile();
    rExecutor.addScript(script);
    if (!rExecutor.exec()) {
        Assert.fail("could not obtain expected results from R");
    }
    @SuppressWarnings({ "rawtypes", "unchecked" }) final List<RResultRecord>[][] recordsByModelAndSequence = (List<RResultRecord>[][]) new List[TEST_MODELS.size()][TEST_SEQUENCES.size()];
    try (final RResultReader reader = new RResultReader(TEST_R_RESULTS_FILE)) {
        reader.stream().forEach(r -> {
            if (recordsByModelAndSequence[r.modelIndex][r.sequenceIndex] == null) {
                recordsByModelAndSequence[r.modelIndex][r.sequenceIndex] = new ArrayList<>(TEST_PATH_LENGTH);
            }
            recordsByModelAndSequence[r.modelIndex][r.sequenceIndex].add(r);
        });
    }
    TEST_EXPECTED_RESULTS = new ArrayList<>(TEST_MODELS.size() * TEST_SEQUENCES.size());
    for (int i = 0; i < recordsByModelAndSequence.length; i++) {
        for (int j = 0; j < recordsByModelAndSequence[i].length; j++) {
            TEST_EXPECTED_RESULTS.add(composeExpectedResult(recordsByModelAndSequence[i][j], i, j));
        }
    }
}
Also used : FileWriter(java.io.FileWriter) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

RScriptExecutor (org.broadinstitute.hellbender.utils.R.RScriptExecutor)18 Resource (org.broadinstitute.hellbender.utils.io.Resource)14 File (java.io.File)8 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 MetricsFile (htsjdk.samtools.metrics.MetricsFile)2 FileNotFoundException (java.io.FileNotFoundException)2 FileWriter (java.io.FileWriter)2 PrintStream (java.io.PrintStream)2 PrintWriter (java.io.PrintWriter)2 DoubleStream (java.util.stream.DoubleStream)2 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)2 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)2 SAMRecord (htsjdk.samtools.SAMRecord)1 SamReader (htsjdk.samtools.SamReader)1 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)1 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)1 Histogram (htsjdk.samtools.util.Histogram)1 NumberFormat (java.text.NumberFormat)1 CommandLineException (org.broadinstitute.barclay.argparser.CommandLineException)1 ExpandingArrayList (org.broadinstitute.hellbender.utils.collections.ExpandingArrayList)1