Search in sources :

Example 6 with GATKReport

use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.

the class VariantRecalibrator method writeModelReport.

protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) {
    final String formatString = "%.3f";
    final GATKReport report = new GATKReport();
    if (dataManager != null) {
        //for unit test
        final double[] meanVector = dataManager.getMeanVector();
        GATKReportTable annotationMeans = makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", dataManager.annotationKeys, meanVector, "Mean", formatString);
        report.addTable(annotationMeans);
        //"varianceVector" is actually stdev
        final double[] varianceVector = dataManager.getVarianceVector();
        GATKReportTable annotationVariances = makeVectorTable("AnnotationStdevs", "Standard deviation for each annotation, used to normalize data", dataManager.annotationKeys, varianceVector, "Standard deviation", formatString);
        report.addTable(annotationVariances);
    }
    //The model and Gaussians don't know what the annotations are, so get them from this class
    //VariantDataManager keeps the annotation in the same order as the argument list
    GATKReportTable positiveMeans = makeMeansTable("PositiveModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the positive model", annotationList, goodModel, formatString);
    report.addTable(positiveMeans);
    GATKReportTable positiveCovariance = makeCovariancesTable("PositiveModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the positive model; covariance matrices are joined by row", annotationList, goodModel, formatString);
    report.addTable(positiveCovariance);
    //do the same for the negative model means
    GATKReportTable negativeMeans = makeMeansTable("NegativeModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the negative model", annotationList, badModel, formatString);
    report.addTable(negativeMeans);
    GATKReportTable negativeCovariance = makeCovariancesTable("NegativeModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the negative model; covariance matrices are joined by row", annotationList, badModel, formatString);
    report.addTable(negativeCovariance);
    return report;
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable)

Example 7 with GATKReport

use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.

the class RecalibrationReportUnitTest method testGatherMissingReadGroup.

@Test
public void testGatherMissingReadGroup() {
    // Hand modified subset of problematic gather inputs submitted by George Grant
    final File input1 = new File(testDir + "NA12878.rg_subset.chr1.recal_data.table");
    final File input2 = new File(testDir + "NA12878.rg_subset.chrY_Plus.recal_data.table");
    final GATKReport report12 = RecalibrationReport.gatherReports(Arrays.asList(input1, input2));
    final GATKReport report21 = RecalibrationReport.gatherReports(Arrays.asList(input2, input1));
    Assert.assertTrue(report12.equals(report21), "GATK reports are different when gathered in a different order.");
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 8 with GATKReport

use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.

the class VariantRecalibrator method onTraversalSuccess.

//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
    // finish processing any queued variants
    consumeQueuedVariants();
    for (int i = 1; i <= max_attempts; i++) {
        try {
            dataManager.setData(reduceSum);
            // Each data point is now (x - mean) / standard deviation
            dataManager.normalizeData();
            // Generate the positive model using the training data and evaluate each variant
            final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
            final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
            engine.evaluateData(dataManager.getData(), goodModel, false);
            // Generate the negative model using the worst performing data and evaluate each variant contrastively
            final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
            final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
            // Don't need the aggregate data anymore so let's free up the memory
            dataManager.dropAggregateData();
            engine.evaluateData(dataManager.getData(), badModel, true);
            if (badModel.failedToConverge || goodModel.failedToConverge) {
                throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
            }
            if (outputModel) {
                GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
                try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
                    report.print(modelReportStream);
                } catch (FileNotFoundException e) {
                    throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
                }
            }
            engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
            // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
            final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
            final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
            final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
            tranchesStream.print(Tranche.tranchesString(tranches));
            logger.info("Writing out recalibration table...");
            dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
            if (RSCRIPT_FILE != null) {
                logger.info("Writing out visualization Rscript file...");
                createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
            }
            if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
                // Print out an info message to make it clear why the tranches plot is not generated
                logger.info("Tranches plot will not be generated since we are running in INDEL mode");
            } else {
                // Execute the RScript command to plot the table of truth values
                RScriptExecutor executor = new RScriptExecutor();
                executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
                executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
                // Print out the command line to make it clear to the user what is being executed and how one might modify it
                logger.info("Executing: " + executor.getApproximateCommandLine());
                executor.exec();
            }
            return true;
        } catch (Exception e) {
            // IllegalArgumentException.
            if (i == max_attempts) {
                throw e;
            } else {
                logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
            }
        }
    }
    return false;
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) Resource(org.broadinstitute.hellbender.utils.io.Resource) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 9 with GATKReport

use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.

the class RecalibrationReport method gatherReportsIntoOneFile.

/**
     * Gather multiple {@link RecalibrationReport}s into a single file
     * @param inputs a list of {@link RecalibrationReport} files to gather
     * @param output a file to write the recalibration reports to
     */
public static void gatherReportsIntoOneFile(final List<File> inputs, final File output) {
    Utils.nonNull(inputs, "inputs");
    Utils.nonNull(output, "output");
    try (final PrintStream outputFile = new PrintStream(output)) {
        final GATKReport report = gatherReports(inputs);
        report.print(outputFile);
    } catch (final FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(output, e);
    }
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 10 with GATKReport

use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.

the class RecalibrationReport method gatherReports.

/**
     * Gathers a set of files containing {@link RecalibrationReport}s into a single {@link GATKReport}.
     *
     * @param inputs a list of files containing {@link RecalibrationReport}s
     * @return gathered recalibration GATK report
     */
public static GATKReport gatherReports(final List<File> inputs) {
    Utils.nonNull(inputs);
    Utils.nonEmpty(inputs, "Cannot gather an empty list of inputs");
    final SortedSet<String> allReadGroups = new TreeSet<>();
    final Map<File, Set<String>> inputReadGroups = new LinkedHashMap<>();
    // Get the read groups from each input report
    for (final File input : inputs) {
        final GATKReport report = new GATKReport(input);
        final Set<String> readGroups = report.getReadGroups();
        inputReadGroups.put(input, readGroups);
        allReadGroups.addAll(readGroups);
    }
    logTablesWithMissingReadGroups(allReadGroups, inputReadGroups);
    final RecalibrationReport result = inputs.stream().map(i -> new RecalibrationReport(new GATKReport(i), allReadGroups)).reduce(RecalibrationReport::combine).filter(r -> !r.isEmpty()).orElseThrow(() -> new GATKException("there is no usable data in any input file"));
    result.quantizationInfo = new QuantizationInfo(result.recalibrationTables, result.RAC.QUANTIZING_LEVELS);
    return result.createGATKReport();
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Logger(org.apache.log4j.Logger) CollectionUtils(org.apache.commons.collections.CollectionUtils) Map(java.util.Map) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable) PrintStream(java.io.PrintStream) GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) Set(java.util.Set) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Covariate(org.broadinstitute.hellbender.utils.recalibration.covariates.Covariate) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) NestedIntegerArray(org.broadinstitute.hellbender.utils.collections.NestedIntegerArray) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) Utils(org.broadinstitute.hellbender.utils.Utils) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) InputStream(java.io.InputStream) SortedSet(java.util.SortedSet) TreeSet(java.util.TreeSet) Set(java.util.Set) LinkedHashMap(java.util.LinkedHashMap) TreeSet(java.util.TreeSet) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) File(java.io.File)

Aggregations

GATKReport (org.broadinstitute.hellbender.utils.report.GATKReport)10 File (java.io.File)4 GATKReportTable (org.broadinstitute.hellbender.utils.report.GATKReportTable)4 FileNotFoundException (java.io.FileNotFoundException)3 PrintStream (java.io.PrintStream)3 ArrayList (java.util.ArrayList)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 Test (org.testng.annotations.Test)3 InputStream (java.io.InputStream)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Random (java.util.Random)1 Set (java.util.Set)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 CollectionUtils (org.apache.commons.collections.CollectionUtils)1 LogManager (org.apache.log4j.LogManager)1