use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.
the class VariantRecalibrator method writeModelReport.
protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) {
final String formatString = "%.3f";
final GATKReport report = new GATKReport();
if (dataManager != null) {
//for unit test
final double[] meanVector = dataManager.getMeanVector();
GATKReportTable annotationMeans = makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", dataManager.annotationKeys, meanVector, "Mean", formatString);
report.addTable(annotationMeans);
//"varianceVector" is actually stdev
final double[] varianceVector = dataManager.getVarianceVector();
GATKReportTable annotationVariances = makeVectorTable("AnnotationStdevs", "Standard deviation for each annotation, used to normalize data", dataManager.annotationKeys, varianceVector, "Standard deviation", formatString);
report.addTable(annotationVariances);
}
//The model and Gaussians don't know what the annotations are, so get them from this class
//VariantDataManager keeps the annotation in the same order as the argument list
GATKReportTable positiveMeans = makeMeansTable("PositiveModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the positive model", annotationList, goodModel, formatString);
report.addTable(positiveMeans);
GATKReportTable positiveCovariance = makeCovariancesTable("PositiveModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the positive model; covariance matrices are joined by row", annotationList, goodModel, formatString);
report.addTable(positiveCovariance);
//do the same for the negative model means
GATKReportTable negativeMeans = makeMeansTable("NegativeModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the negative model", annotationList, badModel, formatString);
report.addTable(negativeMeans);
GATKReportTable negativeCovariance = makeCovariancesTable("NegativeModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the negative model; covariance matrices are joined by row", annotationList, badModel, formatString);
report.addTable(negativeCovariance);
return report;
}
use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.
the class RecalibrationReportUnitTest method testGatherMissingReadGroup.
@Test
public void testGatherMissingReadGroup() {
// Hand modified subset of problematic gather inputs submitted by George Grant
final File input1 = new File(testDir + "NA12878.rg_subset.chr1.recal_data.table");
final File input2 = new File(testDir + "NA12878.rg_subset.chrY_Plus.recal_data.table");
final GATKReport report12 = RecalibrationReport.gatherReports(Arrays.asList(input1, input2));
final GATKReport report21 = RecalibrationReport.gatherReports(Arrays.asList(input2, input1));
Assert.assertTrue(report12.equals(report21), "GATK reports are different when gathered in a different order.");
}
use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalSuccess.
//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
// finish processing any queued variants
consumeQueuedVariants();
for (int i = 1; i <= max_attempts; i++) {
try {
dataManager.setData(reduceSum);
// Each data point is now (x - mean) / standard deviation
dataManager.normalizeData();
// Generate the positive model using the training data and evaluate each variant
final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
engine.evaluateData(dataManager.getData(), goodModel, false);
// Generate the negative model using the worst performing data and evaluate each variant contrastively
final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
// Don't need the aggregate data anymore so let's free up the memory
dataManager.dropAggregateData();
engine.evaluateData(dataManager.getData(), badModel, true);
if (badModel.failedToConverge || goodModel.failedToConverge) {
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
}
if (outputModel) {
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
report.print(modelReportStream);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
}
}
engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
tranchesStream.print(Tranche.tranchesString(tranches));
logger.info("Writing out recalibration table...");
dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
if (RSCRIPT_FILE != null) {
logger.info("Writing out visualization Rscript file...");
createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
}
if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
// Print out an info message to make it clear why the tranches plot is not generated
logger.info("Tranches plot will not be generated since we are running in INDEL mode");
} else {
// Execute the RScript command to plot the table of truth values
RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
// Print out the command line to make it clear to the user what is being executed and how one might modify it
logger.info("Executing: " + executor.getApproximateCommandLine());
executor.exec();
}
return true;
} catch (Exception e) {
// IllegalArgumentException.
if (i == max_attempts) {
throw e;
} else {
logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
}
}
}
return false;
}
use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.
the class RecalibrationReport method gatherReportsIntoOneFile.
/**
* Gather multiple {@link RecalibrationReport}s into a single file
* @param inputs a list of {@link RecalibrationReport} files to gather
* @param output a file to write the recalibration reports to
*/
public static void gatherReportsIntoOneFile(final List<File> inputs, final File output) {
Utils.nonNull(inputs, "inputs");
Utils.nonNull(output, "output");
try (final PrintStream outputFile = new PrintStream(output)) {
final GATKReport report = gatherReports(inputs);
report.print(outputFile);
} catch (final FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(output, e);
}
}
use of org.broadinstitute.hellbender.utils.report.GATKReport in project gatk by broadinstitute.
the class RecalibrationReport method gatherReports.
/**
* Gathers a set of files containing {@link RecalibrationReport}s into a single {@link GATKReport}.
*
* @param inputs a list of files containing {@link RecalibrationReport}s
* @return gathered recalibration GATK report
*/
public static GATKReport gatherReports(final List<File> inputs) {
Utils.nonNull(inputs);
Utils.nonEmpty(inputs, "Cannot gather an empty list of inputs");
final SortedSet<String> allReadGroups = new TreeSet<>();
final Map<File, Set<String>> inputReadGroups = new LinkedHashMap<>();
// Get the read groups from each input report
for (final File input : inputs) {
final GATKReport report = new GATKReport(input);
final Set<String> readGroups = report.getReadGroups();
inputReadGroups.put(input, readGroups);
allReadGroups.addAll(readGroups);
}
logTablesWithMissingReadGroups(allReadGroups, inputReadGroups);
final RecalibrationReport result = inputs.stream().map(i -> new RecalibrationReport(new GATKReport(i), allReadGroups)).reduce(RecalibrationReport::combine).filter(r -> !r.isEmpty()).orElseThrow(() -> new GATKException("there is no usable data in any input file"));
result.quantizationInfo = new QuantizationInfo(result.recalibrationTables, result.RAC.QUANTIZING_LEVELS);
return result.createGATKReport();
}
Aggregations