Search in sources :

Example 1 with GATKReportTable

use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.

the class RecalUtils method generateReportTables.

public static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final StandardCovariateList covariates) {
    final List<GATKReportTable> result = new LinkedList<>();
    int rowIndex = 0;
    GATKReportTable allCovsReportTable = null;
    for (NestedIntegerArray<RecalDatum> table : recalibrationTables) {
        // initialize the array to hold the column names
        final ArrayList<Pair<String, String>> columnNames = new ArrayList<>();
        // save the required covariate name so we can reference it in the future
        columnNames.add(new MutablePair<>(covariates.getReadGroupCovariate().parseNameForReport(), "%s"));
        if (!recalibrationTables.isReadGroupTable(table)) {
            // save the required covariate name so we can reference it in the future
            columnNames.add(new MutablePair<>(covariates.getQualityScoreCovariate().parseNameForReport(), "%d"));
            if (recalibrationTables.isAdditionalCovariateTable(table)) {
                columnNames.add(covariateValue);
                columnNames.add(covariateName);
            }
        }
        // the order of these column names is important here
        columnNames.add(eventType);
        columnNames.add(empiricalQuality);
        if (recalibrationTables.isReadGroupTable(table)) {
            // only the read group table needs the estimated Q reported
            columnNames.add(estimatedQReported);
        }
        columnNames.add(nObservations);
        columnNames.add(nErrors);
        final String reportTableName = getReportTableName(recalibrationTables, table);
        final GATKReportTable.Sorting sort = GATKReportTable.Sorting.SORT_BY_COLUMN;
        final GATKReportTable reportTable;
        final boolean addToList;
        //XXX this "if" implicitly uses the knowledge about the ordering of tables.
        if (!recalibrationTables.isAdditionalCovariateTable(table)) {
            reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
            // reset the row index since we're starting with a new table
            rowIndex = 0;
            addToList = true;
        } else if (allCovsReportTable == null && recalibrationTables.isAdditionalCovariateTable(table)) {
            reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
            // reset the row index since we're starting with a new table
            rowIndex = 0;
            allCovsReportTable = reportTable;
            addToList = true;
        } else {
            reportTable = allCovsReportTable;
            addToList = false;
        }
        for (final NestedIntegerArray.Leaf<RecalDatum> row : table.getAllLeaves()) {
            final RecalDatum datum = row.value;
            final int[] keys = row.keys;
            int columnIndex = 0;
            int keyIndex = 0;
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getReadGroupCovariate().formatKey(keys[keyIndex++]));
            if (!recalibrationTables.isReadGroupTable(table)) {
                reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getQualityScoreCovariate().formatKey(keys[keyIndex++]));
                if (recalibrationTables.isAdditionalCovariateTable(table)) {
                    final Covariate covariate = recalibrationTables.getCovariateForTable(table);
                    reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.formatKey(keys[keyIndex++]));
                    reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.parseNameForReport());
                }
            }
            final EventType event = EventType.eventFrom(keys[keyIndex]);
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), event.toString());
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEmpiricalQuality());
            if (recalibrationTables.isReadGroupTable(table)) {
                // we only add the estimated Q reported in the RG table
                reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEstimatedQReported());
            }
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getNumObservations());
            reportTable.set(rowIndex, columnNames.get(columnIndex).getLeft(), datum.getNumMismatches());
            rowIndex++;
        }
        if (addToList) {
            //XXX using a set would be slow because the equals method on GATKReportTable is expensive.
            result.add(reportTable);
        }
    }
    return result;
}
Also used : Covariate(org.broadinstitute.hellbender.utils.recalibration.covariates.Covariate) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable) MutablePair(org.apache.commons.lang3.tuple.MutablePair) Pair(org.apache.commons.lang3.tuple.Pair) NestedIntegerArray(org.broadinstitute.hellbender.utils.collections.NestedIntegerArray)

Example 2 with GATKReportTable

use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.

the class QuantizationInfo method generateReportTable.

public GATKReportTable generateReportTable() {
    GATKReportTable quantizedTable;
    quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.Sorting.SORT_BY_COLUMN);
    quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME, "%d");
    quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, "%d");
    quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, "%d");
    for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
        quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
        quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
        quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
    }
    return quantizedTable;
}
Also used : GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable)

Example 3 with GATKReportTable

use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.

the class VariantRecalibratorModelOutputUnitTest method testVQSRModelOutput.

@Test
public void testVQSRModelOutput() {
    final int numAnnotations = 6;
    final double shrinkage = 1.0;
    final double dirichlet = 0.001;
    final double priorCounts = 20.0;
    final double epsilon = 1e-6;
    Random rand = new Random(12878);
    MultivariateGaussian goodGaussian1 = new MultivariateGaussian(1, numAnnotations);
    goodGaussian1.initializeRandomMu(rand);
    goodGaussian1.initializeRandomSigma(rand);
    MultivariateGaussian goodGaussian2 = new MultivariateGaussian(1, numAnnotations);
    goodGaussian2.initializeRandomMu(rand);
    goodGaussian2.initializeRandomSigma(rand);
    MultivariateGaussian badGaussian1 = new MultivariateGaussian(1, numAnnotations);
    badGaussian1.initializeRandomMu(rand);
    badGaussian1.initializeRandomSigma(rand);
    List<MultivariateGaussian> goodGaussianList = new ArrayList<>();
    goodGaussianList.add(goodGaussian1);
    goodGaussianList.add(goodGaussian2);
    List<MultivariateGaussian> badGaussianList = new ArrayList<>();
    badGaussianList.add(badGaussian1);
    GaussianMixtureModel goodModel = new GaussianMixtureModel(goodGaussianList, shrinkage, dirichlet, priorCounts);
    GaussianMixtureModel badModel = new GaussianMixtureModel(badGaussianList, shrinkage, dirichlet, priorCounts);
    if (printTables) {
        logger.info("Good model mean matrix:");
        logger.info(vectorToString(goodGaussian1.mu));
        logger.info(vectorToString(goodGaussian2.mu));
        logger.info("\n\n");
        logger.info("Good model covariance matrices:");
        goodGaussian1.sigma.print(10, 3);
        goodGaussian2.sigma.print(10, 3);
        logger.info("\n\n");
        logger.info("Bad model mean matrix:\n");
        logger.info(vectorToString(badGaussian1.mu));
        logger.info("\n\n");
        logger.info("Bad model covariance matrix:");
        badGaussian1.sigma.print(10, 3);
    }
    VariantRecalibrator vqsr = new VariantRecalibrator();
    List<String> annotationList = new ArrayList<>();
    annotationList.add("QD");
    annotationList.add("MQ");
    annotationList.add("FS");
    annotationList.add("SOR");
    annotationList.add("ReadPosRankSum");
    annotationList.add("MQRankSum");
    GATKReport report = vqsr.writeModelReport(goodModel, badModel, annotationList);
    if (printTables)
        report.print(System.out);
    //Check values for Gaussian means
    GATKReportTable goodMus = report.getTable("PositiveModelMeans");
    for (int i = 0; i < annotationList.size(); i++) {
        Assert.assertEquals(goodGaussian1.mu[i], (Double) goodMus.get(0, annotationList.get(i)), epsilon);
    }
    for (int i = 0; i < annotationList.size(); i++) {
        Assert.assertEquals(goodGaussian2.mu[i], (Double) goodMus.get(1, annotationList.get(i)), epsilon);
    }
    GATKReportTable badMus = report.getTable("NegativeModelMeans");
    for (int i = 0; i < annotationList.size(); i++) {
        Assert.assertEquals(badGaussian1.mu[i], (Double) badMus.get(0, annotationList.get(i)), epsilon);
    }
    //Check values for Gaussian covariances
    GATKReportTable goodSigma = report.getTable("PositiveModelCovariances");
    for (int i = 0; i < annotationList.size(); i++) {
        for (int j = 0; j < annotationList.size(); j++) {
            Assert.assertEquals(goodGaussian1.sigma.get(i, j), (Double) goodSigma.get(i, annotationList.get(j)), epsilon);
        }
    }
    //add annotationList.size() to row indexes for second Gaussian because the matrices are concatenated by row in the report
    for (int i = 0; i < annotationList.size(); i++) {
        for (int j = 0; j < annotationList.size(); j++) {
            Assert.assertEquals(goodGaussian2.sigma.get(i, j), (Double) goodSigma.get(annotationList.size() + i, annotationList.get(j)), epsilon);
        }
    }
    GATKReportTable badSigma = report.getTable("NegativeModelCovariances");
    for (int i = 0; i < annotationList.size(); i++) {
        for (int j = 0; j < annotationList.size(); j++) {
            Assert.assertEquals(badGaussian1.sigma.get(i, j), (Double) badSigma.get(i, annotationList.get(j)), epsilon);
        }
    }
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) Random(java.util.Random) ArrayList(java.util.ArrayList) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable) Test(org.testng.annotations.Test)

Example 4 with GATKReportTable

use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.

the class VariantRecalibratorModelOutputUnitTest method testAnnotationNormalizationOutput.

@Test
public //This is tested separately to avoid setting up a VariantDataManager and populating it with fake data
void testAnnotationNormalizationOutput() {
    final VariantRecalibrator vqsr = new VariantRecalibrator();
    final List<String> annotationList = new ArrayList<>();
    annotationList.add("QD");
    annotationList.add("FS");
    annotationList.add("ReadPosRankSum");
    annotationList.add("MQ");
    annotationList.add("MQRankSum");
    annotationList.add("SOR");
    final double epsilon = 1e-6;
    double[] meanVector = { 16.13, 2.45, 0.37, 59.08, 0.14, 0.91 };
    final String columnName = "Mean";
    final String formatString = "%.3f";
    GATKReportTable vectorTable = vqsr.makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", annotationList, meanVector, columnName, formatString);
    for (int i = 0; i < annotationList.size(); i++) {
        Assert.assertEquals(meanVector[i], (Double) vectorTable.get(i, columnName), epsilon);
    }
    if (printTables) {
        final GATKReport report = new GATKReport();
        report.addTable(vectorTable);
        report.print(System.out);
    }
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) ArrayList(java.util.ArrayList) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable) Test(org.testng.annotations.Test)

Example 5 with GATKReportTable

use of org.broadinstitute.hellbender.utils.report.GATKReportTable in project gatk by broadinstitute.

the class VariantRecalibrator method writeModelReport.

protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) {
    final String formatString = "%.3f";
    final GATKReport report = new GATKReport();
    if (dataManager != null) {
        //for unit test
        final double[] meanVector = dataManager.getMeanVector();
        GATKReportTable annotationMeans = makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", dataManager.annotationKeys, meanVector, "Mean", formatString);
        report.addTable(annotationMeans);
        //"varianceVector" is actually stdev
        final double[] varianceVector = dataManager.getVarianceVector();
        GATKReportTable annotationVariances = makeVectorTable("AnnotationStdevs", "Standard deviation for each annotation, used to normalize data", dataManager.annotationKeys, varianceVector, "Standard deviation", formatString);
        report.addTable(annotationVariances);
    }
    //The model and Gaussians don't know what the annotations are, so get them from this class
    //VariantDataManager keeps the annotation in the same order as the argument list
    GATKReportTable positiveMeans = makeMeansTable("PositiveModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the positive model", annotationList, goodModel, formatString);
    report.addTable(positiveMeans);
    GATKReportTable positiveCovariance = makeCovariancesTable("PositiveModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the positive model; covariance matrices are joined by row", annotationList, goodModel, formatString);
    report.addTable(positiveCovariance);
    //do the same for the negative model means
    GATKReportTable negativeMeans = makeMeansTable("NegativeModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the negative model", annotationList, badModel, formatString);
    report.addTable(negativeMeans);
    GATKReportTable negativeCovariance = makeCovariancesTable("NegativeModelCovariances", "Matrix to describe the (normalized) covariance for each Gaussian in the negative model; covariance matrices are joined by row", annotationList, badModel, formatString);
    report.addTable(negativeCovariance);
    return report;
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable)

Aggregations

GATKReportTable (org.broadinstitute.hellbender.utils.report.GATKReportTable)10 GATKReport (org.broadinstitute.hellbender.utils.report.GATKReport)3 ArrayList (java.util.ArrayList)2 Test (org.testng.annotations.Test)2 Matrix (Jama.Matrix)1 Random (java.util.Random)1 MutablePair (org.apache.commons.lang3.tuple.MutablePair)1 Pair (org.apache.commons.lang3.tuple.Pair)1 NestedIntegerArray (org.broadinstitute.hellbender.utils.collections.NestedIntegerArray)1 Covariate (org.broadinstitute.hellbender.utils.recalibration.covariates.Covariate)1