use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.
the class RecalUtils method generateReportTables.
public static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final StandardCovariateList covariates) {
final List<GATKReportTable> result = new LinkedList<>();
int rowIndex = 0;
GATKReportTable allCovsReportTable = null;
for (NestedIntegerArray<RecalDatum> table : recalibrationTables) {
// initialize the array to hold the column names
final ArrayList<Pair<String, String>> columnNames = new ArrayList<>();
// save the required covariate name so we can reference it in the future
columnNames.add(new MutablePair<>(covariates.getReadGroupCovariate().parseNameForReport(), "%s"));
if (!recalibrationTables.isReadGroupTable(table)) {
// save the required covariate name so we can reference it in the future
columnNames.add(new MutablePair<>(covariates.getQualityScoreCovariate().parseNameForReport(), "%d"));
if (recalibrationTables.isAdditionalCovariateTable(table)) {
columnNames.add(covariateValue);
columnNames.add(covariateName);
}
}
// the order of these column names is important here
columnNames.add(eventType);
columnNames.add(empiricalQuality);
if (recalibrationTables.isReadGroupTable(table)) {
// only the read group table needs the estimated Q reported
columnNames.add(estimatedQReported);
}
columnNames.add(nObservations);
columnNames.add(nErrors);
final String reportTableName = getReportTableName(recalibrationTables, table);
final GATKReportTable.Sorting sort = GATKReportTable.Sorting.SORT_BY_COLUMN;
final GATKReportTable reportTable;
final boolean addToList;
//XXX this "if" implicitly uses the knowledge about the ordering of tables.
if (!recalibrationTables.isAdditionalCovariateTable(table)) {
reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
// reset the row index since we're starting with a new table
rowIndex = 0;
addToList = true;
} else if (allCovsReportTable == null && recalibrationTables.isAdditionalCovariateTable(table)) {
reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
// reset the row index since we're starting with a new table
rowIndex = 0;
allCovsReportTable = reportTable;
addToList = true;
} else {
reportTable = allCovsReportTable;
addToList = false;
}
for (final NestedIntegerArray.Leaf<RecalDatum> row : table.getAllLeaves()) {
final RecalDatum datum = row.value;
final int[] keys = row.keys;
int columnIndex = 0;
int keyIndex = 0;
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getReadGroupCovariate().formatKey(keys[keyIndex++]));
if (!recalibrationTables.isReadGroupTable(table)) {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getQualityScoreCovariate().formatKey(keys[keyIndex++]));
if (recalibrationTables.isAdditionalCovariateTable(table)) {
final Covariate covariate = recalibrationTables.getCovariateForTable(table);
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.formatKey(keys[keyIndex++]));
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.parseNameForReport());
}
}
final EventType event = EventType.eventFrom(keys[keyIndex]);
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), event.toString());
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEmpiricalQuality());
if (recalibrationTables.isReadGroupTable(table)) {
// we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEstimatedQReported());
}
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getNumObservations());
reportTable.set(rowIndex, columnNames.get(columnIndex).getLeft(), datum.getNumMismatches());
rowIndex++;
}
if (addToList) {
//XXX using a set would be slow because the equals method on GATKReportTable is expensive.
result.add(reportTable);
}
}
return result;
}
use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.
the class QuantizationInfo method generateReportTable.
public GATKReportTable generateReportTable() {
GATKReportTable quantizedTable;
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.Sorting.SORT_BY_COLUMN);
quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME, "%d");
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, "%d");
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, "%d");
for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
}
return quantizedTable;
}
use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.
the class VariantRecalibratorModelOutputUnitTest method testVQSRModelOutput.
@Test
public void testVQSRModelOutput() {
final int numAnnotations = 6;
final double shrinkage = 1.0;
final double dirichlet = 0.001;
final double priorCounts = 20.0;
final double epsilon = 1e-6;
Random rand = new Random(12878);
MultivariateGaussian goodGaussian1 = new MultivariateGaussian(1, numAnnotations);
goodGaussian1.initializeRandomMu(rand);
goodGaussian1.initializeRandomSigma(rand);
MultivariateGaussian goodGaussian2 = new MultivariateGaussian(1, numAnnotations);
goodGaussian2.initializeRandomMu(rand);
goodGaussian2.initializeRandomSigma(rand);
MultivariateGaussian badGaussian1 = new MultivariateGaussian(1, numAnnotations);
badGaussian1.initializeRandomMu(rand);
badGaussian1.initializeRandomSigma(rand);
List<MultivariateGaussian> goodGaussianList = new ArrayList<>();
goodGaussianList.add(goodGaussian1);
goodGaussianList.add(goodGaussian2);
List<MultivariateGaussian> badGaussianList = new ArrayList<>();
badGaussianList.add(badGaussian1);
GaussianMixtureModel goodModel = new GaussianMixtureModel(goodGaussianList, shrinkage, dirichlet, priorCounts);
GaussianMixtureModel badModel = new GaussianMixtureModel(badGaussianList, shrinkage, dirichlet, priorCounts);
if (printTables) {
logger.info("Good model mean matrix:");
logger.info(vectorToString(goodGaussian1.mu));
logger.info(vectorToString(goodGaussian2.mu));
logger.info("\n\n");
logger.info("Good model covariance matrices:");
goodGaussian1.sigma.print(10, 3);
goodGaussian2.sigma.print(10, 3);
logger.info("\n\n");
logger.info("Bad model mean matrix:\n");
logger.info(vectorToString(badGaussian1.mu));
logger.info("\n\n");
logger.info("Bad model covariance matrix:");
badGaussian1.sigma.print(10, 3);
}
VariantRecalibrator vqsr = new VariantRecalibrator();
List<String> annotationList = new ArrayList<>();
annotationList.add("QD");
annotationList.add("MQ");
annotationList.add("FS");
annotationList.add("SOR");
annotationList.add("ReadPosRankSum");
annotationList.add("MQRankSum");
GATKReport report = vqsr.writeModelReport(goodModel, badModel, annotationList);
if (printTables)
report.print(System.out);
//Check values for Gaussian means
GATKReportTable goodMus = report.getTable("PositiveModelMeans");
for (int i = 0; i < annotationList.size(); i++) {
Assert.assertEquals(goodGaussian1.mu[i], (Double) goodMus.get(0, annotationList.get(i)), epsilon);
}
for (int i = 0; i < annotationList.size(); i++) {
Assert.assertEquals(goodGaussian2.mu[i], (Double) goodMus.get(1, annotationList.get(i)), epsilon);
}
GATKReportTable badMus = report.getTable("NegativeModelMeans");
for (int i = 0; i < annotationList.size(); i++) {
Assert.assertEquals(badGaussian1.mu[i], (Double) badMus.get(0, annotationList.get(i)), epsilon);
}
//Check values for Gaussian covariances
GATKReportTable goodSigma = report.getTable("PositiveModelCovariances");
for (int i = 0; i < annotationList.size(); i++) {
for (int j = 0; j < annotationList.size(); j++) {
Assert.assertEquals(goodGaussian1.sigma.get(i, j), (Double) goodSigma.get(i, annotationList.get(j)), epsilon);
}
}
//add annotationList.size() to row indexes for second Gaussian because the matrices are concatenated by row in the report
for (int i = 0; i < annotationList.size(); i++) {
for (int j = 0; j < annotationList.size(); j++) {
Assert.assertEquals(goodGaussian2.sigma.get(i, j), (Double) goodSigma.get(annotationList.size() + i, annotationList.get(j)), epsilon);
}
}
GATKReportTable badSigma = report.getTable("NegativeModelCovariances");
for (int i = 0; i < annotationList.size(); i++) {
for (int j = 0; j < annotationList.size(); j++) {
Assert.assertEquals(badGaussian1.sigma.get(i, j), (Double) badSigma.get(i, annotationList.get(j)), epsilon);
}
}
}
use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.
the class VariantRecalibratorModelOutputUnitTest method testAnnotationNormalizationOutput.
@Test
public //This is tested separately to avoid setting up a VariantDataManager and populating it with fake data
void testAnnotationNormalizationOutput() {
final VariantRecalibrator vqsr = new VariantRecalibrator();
final List<String> annotationList = new ArrayList<>();
annotationList.add("QD");
annotationList.add("FS");
annotationList.add("ReadPosRankSum");
annotationList.add("MQ");
annotationList.add("MQRankSum");
annotationList.add("SOR");
final double epsilon = 1e-6;
double[] meanVector = { 16.13, 2.45, 0.37, 59.08, 0.14, 0.91 };
final String columnName = "Mean";
final String formatString = "%.3f";
GATKReportTable vectorTable = vqsr.makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", annotationList, meanVector, columnName, formatString);
for (int i = 0; i < annotationList.size(); i++) {
Assert.assertEquals(meanVector[i], (Double) vectorTable.get(i, columnName), epsilon);
}
if (printTables) {
final GATKReport report = new GATKReport();
report.addTable(vectorTable);
report.print(System.out);
}
}
use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.
the class VariantRecalibrator method writeModelReport.
protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) {
final String formatString = "%.3f";
final GATKReport report = new GATKReport();
if (dataManager != null) {
//for unit test
final double[] meanVector = dataManager.getMeanVector();
GATKReportTable annotationMeans = makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", dataManager.annotationKeys, meanVector, "Mean", formatString);
report.addTable(annotationMeans);
//"varianceVector" is actually stdev
final double[] varianceVector = dataManager.getVarianceVector();
GATKReportTable annotationVariances = makeVectorTable("AnnotationStdevs", "Standard deviation for each annotation, used to normalize data", dataManager.annotationKeys, varianceVector, "Standard deviation", formatString);
report.addTable(annotationVariances);
}
//The model and Gaussians don't know what the annotations are, so get them from this class
//VariantDataManager keeps the annotation in the same order as the argument list
GATKReportTable positiveMeans = makeMeansTable("PositiveModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the positive model", annotationList, goodModel, formatString);
report.addTable(positiveMeans);
GATKReportTable positiveCovariance = makeCovariancesTable("PositiveModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the positive model; covariance matrices are joined by row", annotationList, goodModel, formatString);
report.addTable(positiveCovariance);
//do the same for the negative model means
GATKReportTable negativeMeans = makeMeansTable("NegativeModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the negative model", annotationList, badModel, formatString);
report.addTable(negativeMeans);
GATKReportTable negativeCovariance = makeCovariancesTable("NegativeModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the negative model; covariance matrices are joined by row", annotationList, badModel, formatString);
report.addTable(negativeCovariance);
return report;
}
Aggregations