use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class IntegerCopyNumberTransitionMatrixUnitTest method testPadding.
@Test
public void testPadding() {
final IntegerCopyNumberTransitionMatrix data = new IntegerCopyNumberTransitionMatrix(new Array2DRowRealMatrix(new double[][] { { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } }), 2);
final RealMatrix expected = new Array2DRowRealMatrix(new double[][] { { 1.0 / 12, 2.0 / 15, 3.0 / 18, 0, 0 }, { 4.0 / 12, 5.0 / 15, 6.0 / 18, 0, 0 }, { 7.0 / 12, 8.0 / 15, 9.0 / 18, 0, 0 }, { 0, 0, 0, 1, 0 }, { 0, 0, 0, 0, 1 } });
Assert.assertEquals(data.getTransitionMatrix().subtract(expected).getNorm(), 0, 1e-12);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method calculateTargetVariances.
/**
* Determine the variance for each target in the PoN (panel targets).
*
* @return array of doubles where each double corresponds to a target in the PoN (panel targets)
*/
private static double[] calculateTargetVariances(final ReadCountCollection normalizedCounts, final List<String> panelTargetNames, final ReductionResult reduction, final JavaSparkContext ctx) {
Utils.nonNull(panelTargetNames);
Utils.nonNull(normalizedCounts);
Utils.nonNull(reduction);
final PCATangentNormalizationResult allNormals = PCATangentNormalizationUtils.tangentNormalizeNormalsInPoN(normalizedCounts, panelTargetNames, reduction.getReducedCounts(), reduction.getReducedPseudoInverse(), ctx);
final RealMatrix allSampleProjectedTargets = allNormals.getTangentNormalized().counts();
return MatrixSummaryUtils.getRowVariances(allSampleProjectedTargets);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method calculateReducedPanelAndPInverses.
/**
* SVD and Pseudo inverse calculation.
*
* @param logNormalized the input counts for the SVD and reduction steps, fully normalized and already logged.
* @param requestedNumberOfEigensamples user requested number of eigensamples for the reduced panel.
* @return never {@code null}.
*/
@VisibleForTesting
static ReductionResult calculateReducedPanelAndPInverses(final ReadCountCollection logNormalized, final OptionalInt requestedNumberOfEigensamples, final Logger logger, final JavaSparkContext ctx) {
if (ctx == null) {
logger.warn("No Spark context provided, not going to use Spark...");
}
final RealMatrix logNormalizedCounts = logNormalized.counts();
final int numberOfCountColumns = logNormalizedCounts.getColumnDimension();
logger.info("Starting the SVD decomposition of the log-normalized counts ...");
final long svdStartTime = System.currentTimeMillis();
final SVD logNormalizedSVD = SVDFactory.createSVD(logNormalized.counts(), ctx);
final long svdEndTime = System.currentTimeMillis();
logger.info(String.format("Finished the SVD decomposition of the log-normal counts. Elapse of %d seconds", (svdEndTime - svdStartTime) / 1000));
final int numberOfEigensamples = determineNumberOfEigensamples(requestedNumberOfEigensamples, numberOfCountColumns, logNormalizedSVD, logger);
logger.info(String.format("Including %d eigensamples in the reduced PoN", numberOfEigensamples));
final double[] singularValues = logNormalizedSVD.getSingularValues();
final RealMatrix reducedCounts = logNormalizedSVD.getU().getSubMatrix(0, logNormalizedCounts.getRowDimension() - 1, 0, numberOfEigensamples - 1).copy();
reducedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return singularValues[column] * value;
}
});
// The Pseudo inverse comes nearly for free from having run the SVD decomposition.
final RealMatrix logNormalizedPseudoInverse = logNormalizedSVD.getPinv();
logger.info("Calculating the reduced PoN inverse matrix...");
final long riStartTime = System.currentTimeMillis();
final RealMatrix reducedCountsPseudoInverse = SVDFactory.createSVD(reducedCounts, ctx).getPinv();
final long riEndTime = System.currentTimeMillis();
logger.info(String.format("Finished calculating the reduced PoN inverse matrix. Elapse of %d seconds", (riEndTime - riStartTime) / 1000));
return new ReductionResult(logNormalizedPseudoInverse, reducedCounts, reducedCountsPseudoInverse, logNormalizedSVD.getSingularValues());
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoN method readMatrixAndCheckDimensions.
/**
* Reads a matrix from the underlying PoN file and check its dimensions.
* @param fullPath the target data-set full path within the HDF5 file.
* @param expectedRowCount a predicate that returns true iff its argument is an expected number of rows.
* @param expectedColumnCount a predicate that returns true iff its argument is an expected number of columns.
* @return GATKException if the result matrix dimensions do not match the expectations or
* any other cause as described in {@link #readMatrix(String)}.
*/
private RealMatrix readMatrixAndCheckDimensions(final String fullPath, final IntPredicate expectedRowCount, final IntPredicate expectedColumnCount) {
final RealMatrix result = readMatrix(fullPath);
if (expectedRowCount.test(result.getRowDimension()) && expectedColumnCount.test(result.getColumnDimension())) {
return result;
}
final RealMatrix transpose = result.transpose();
if (!expectedRowCount.test(transpose.getRowDimension())) {
throw new GATKException(String.format("wrong number of rows in '%s' matrix from file '%s': %d", fullPath, file.getFile(), result.getRowDimension()));
}
if (!expectedColumnCount.test(transpose.getColumnDimension())) {
throw new GATKException(String.format("wrong number of columns in '%s' from file '%s': %d", fullPath, file.getFile(), result.getColumnDimension()));
}
return transpose;
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class PCATangentNormalizationUtils method tangentNormalizeNonSpark.
/**
* Tangent normalize given the raw PoN data without using Spark.
*/
private static PCATangentNormalizationResult tangentNormalizeNonSpark(final ReadCountCollection targetFactorNormalizedCounts, final RealMatrix reducedPanelCounts, final RealMatrix reducedPanelPInvCounts, final CaseToPoNTargetMapper targetMapper, final RealMatrix tangentNormalizationInputCounts) {
// Calculate the beta-hats for the input read count columns (samples).
logger.info("Calculating beta hats...");
final RealMatrix tangentBetaHats = calculateBetaHats(reducedPanelPInvCounts, tangentNormalizationInputCounts, EPSILON);
// Actual tangent normalization step.
logger.info("Performing actual tangent normalization (" + tangentNormalizationInputCounts.getColumnDimension() + " columns)...");
final RealMatrix tangentNormalizedCounts = tangentNormalize(reducedPanelCounts, tangentNormalizationInputCounts, tangentBetaHats);
// Output the tangent normalized counts.
logger.info("Post-processing tangent normalization results...");
final ReadCountCollection tangentNormalized = targetMapper.fromPoNtoCaseCountCollection(tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
final ReadCountCollection preTangentNormalized = targetMapper.fromPoNtoCaseCountCollection(tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());
return new PCATangentNormalizationResult(tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts);
}
Aggregations