use of org.broadinstitute.hellbender.utils.svd.SVD in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method calculateReducedPanelAndPInverses.
/**
* SVD and Pseudo inverse calculation.
*
* @param logNormalized the input counts for the SVD and reduction steps, fully normalized and already logged.
* @param requestedNumberOfEigensamples user requested number of eigensamples for the reduced panel.
* @return never {@code null}.
*/
@VisibleForTesting
static ReductionResult calculateReducedPanelAndPInverses(final ReadCountCollection logNormalized, final OptionalInt requestedNumberOfEigensamples, final Logger logger, final JavaSparkContext ctx) {
if (ctx == null) {
logger.warn("No Spark context provided, not going to use Spark...");
}
final RealMatrix logNormalizedCounts = logNormalized.counts();
final int numberOfCountColumns = logNormalizedCounts.getColumnDimension();
logger.info("Starting the SVD decomposition of the log-normalized counts ...");
final long svdStartTime = System.currentTimeMillis();
final SVD logNormalizedSVD = SVDFactory.createSVD(logNormalized.counts(), ctx);
final long svdEndTime = System.currentTimeMillis();
logger.info(String.format("Finished the SVD decomposition of the log-normal counts. Elapse of %d seconds", (svdEndTime - svdStartTime) / 1000));
final int numberOfEigensamples = determineNumberOfEigensamples(requestedNumberOfEigensamples, numberOfCountColumns, logNormalizedSVD, logger);
logger.info(String.format("Including %d eigensamples in the reduced PoN", numberOfEigensamples));
final double[] singularValues = logNormalizedSVD.getSingularValues();
final RealMatrix reducedCounts = logNormalizedSVD.getU().getSubMatrix(0, logNormalizedCounts.getRowDimension() - 1, 0, numberOfEigensamples - 1).copy();
reducedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return singularValues[column] * value;
}
});
// The Pseudo inverse comes nearly for free from having run the SVD decomposition.
final RealMatrix logNormalizedPseudoInverse = logNormalizedSVD.getPinv();
logger.info("Calculating the reduced PoN inverse matrix...");
final long riStartTime = System.currentTimeMillis();
final RealMatrix reducedCountsPseudoInverse = SVDFactory.createSVD(reducedCounts, ctx).getPinv();
final long riEndTime = System.currentTimeMillis();
logger.info(String.format("Finished calculating the reduced PoN inverse matrix. Elapse of %d seconds", (riEndTime - riStartTime) / 1000));
return new ReductionResult(logNormalizedPseudoInverse, reducedCounts, reducedCountsPseudoInverse, logNormalizedSVD.getSingularValues());
}
use of org.broadinstitute.hellbender.utils.svd.SVD in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method calculateReducedPanelAndPInverses.
/**
* SVD and Pseudo inverse calculation.
*
* @param logNormalized the input counts for the SVD and reduction steps, fully normalized and already logged.
* @param requestedNumberOfEigensamples user requested number of eigensamples for the reduced panel.
* @return never {@code null}.
*/
@VisibleForTesting
static ReductionResult calculateReducedPanelAndPInverses(final ReadCountCollection logNormalized, final OptionalInt requestedNumberOfEigensamples, final Logger logger, final JavaSparkContext ctx) {
if (ctx == null) {
logger.warn("No Spark context provided, not going to use Spark...");
}
final RealMatrix logNormalizedCounts = logNormalized.counts();
final int numberOfCountColumns = logNormalizedCounts.getColumnDimension();
logger.info("Starting the SVD decomposition of the log-normalized counts ...");
final long svdStartTime = System.currentTimeMillis();
final SVD logNormalizedSVD = SVDFactory.createSVD(logNormalized.counts(), ctx);
final long svdEndTime = System.currentTimeMillis();
logger.info(String.format("Finished the SVD decomposition of the log-normal counts. Elapse of %d seconds", (svdEndTime - svdStartTime) / 1000));
final int numberOfEigensamples = determineNumberOfEigensamples(requestedNumberOfEigensamples, numberOfCountColumns, logNormalizedSVD, logger);
logger.info(String.format("Including %d eigensamples in the reduced PoN", numberOfEigensamples));
final double[] singularValues = logNormalizedSVD.getSingularValues();
final RealMatrix reducedCounts = logNormalizedSVD.getU().getSubMatrix(0, logNormalizedCounts.getRowDimension() - 1, 0, numberOfEigensamples - 1).copy();
reducedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return singularValues[column] * value;
}
});
// The Pseudo inverse comes nearly for free from having run the SVD decomposition.
final RealMatrix logNormalizedPseudoInverse = logNormalizedSVD.getPinv();
logger.info("Calculating the reduced PoN inverse matrix...");
final long riStartTime = System.currentTimeMillis();
final RealMatrix reducedCountsPseudoInverse = SVDFactory.createSVD(reducedCounts, ctx).getPinv();
final long riEndTime = System.currentTimeMillis();
logger.info(String.format("Finished calculating the reduced PoN inverse matrix. Elapse of %d seconds", (riEndTime - riStartTime) / 1000));
return new ReductionResult(logNormalizedPseudoInverse, reducedCounts, reducedCountsPseudoInverse, logNormalizedSVD.getSingularValues());
}
use of org.broadinstitute.hellbender.utils.svd.SVD in project gatk by broadinstitute.
the class DecomposeSingularValuesIntegrationTest method assertSVDValues.
private void assertSVDValues(final File outputFileV, final File outputFileS, final File outputFileU) {
try {
final ReadCountCollection rcc = ReadCountCollectionUtils.parse(CONTROL_PCOV_FULL_FILE);
final SVD svd = SVDFactory.createSVD(rcc.counts());
final RealMatrix sDiag = new DiagonalMatrix(svd.getSingularValues());
assertOutputFileValues(outputFileU, svd.getU());
assertOutputFileValues(outputFileS, sDiag);
assertOutputFileValues(outputFileV, svd.getV());
assertUnitaryMatrix(svd.getV());
assertUnitaryMatrix(svd.getU());
Assert.assertTrue(MatrixUtils.isSymmetric(sDiag, 1e-32));
} catch (final IOException ioe) {
Assert.fail("Could not open test file: " + CONTROL_PCOV_FULL_FILE, ioe);
}
}
use of org.broadinstitute.hellbender.utils.svd.SVD in project gatk by broadinstitute.
the class DecomposeSingularValues method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
try {
final ReadCountCollection rcc = ReadCountCollectionUtils.parse(inputFile);
final SVD svd = SVDFactory.createSVD(rcc.counts(), ctx);
writeMatrix(svd.getV(), outputFileV);
writeMatrix(svd.getU(), outputFileU);
writeMatrix(new DiagonalMatrix(svd.getSingularValues()), outputFileS);
} catch (final IOException ioe) {
throw new UserException.CouldNotReadInputFile(inputFile, ioe.getMessage());
}
}
use of org.broadinstitute.hellbender.utils.svd.SVD in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testDetermineNumberOfEigensamplesSpark.
@Test(dataProvider = "singleEigensample")
public void testDetermineNumberOfEigensamplesSpark(final ReadCountCollection logNormals) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final SVD logNormalsSVD = SVDFactory.createSVD(logNormals.counts(), ctx);
final int actualNumber = HDF5PCACoveragePoNCreationUtils.determineNumberOfEigensamples(OptionalInt.empty(), logNormals.columnNames().size(), logNormalsSVD, NULL_LOGGER);
Assert.assertEquals(actualNumber, 1);
}
Aggregations