Search in sources :

Example 1 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class XHMMSegmentCallerBase method standardizeBySample.

/**
     * Standardize read counts (per-sample).
     * Note: modification is done in-place.
     *
     * @param counts original read counts
     */
private void standardizeBySample(final RealMatrix counts) {
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(counts);
    final double[] columnStdDev = GATKProtectedMathUtils.columnStdDevs(counts);
    counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int row, int column, double value) {
            return (value - columnMeans[column]) / columnStdDev[column];
        }
    });
}
Also used : DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 2 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method normalizeAndLogReadCounts.

/**
     * Final pre-panel normalization that consists of dividing all counts by the median of
     * its column and log it with base 2.
     * <p>
     *     The normalization occurs in-place.
     * </p>
     *
     * @param readCounts the input counts to normalize.
     */
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
        }
    });
    logger.info("Counts normalized by the column median and log2'd.");
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) SVD(org.broadinstitute.hellbender.utils.svd.SVD) java.util(java.util) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Pair(org.apache.commons.lang3.tuple.Pair) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method subtractMedianOfMedians.

/**
     * Calculates the median of column medians and subtract it from all counts.
     * @param readCounts the input counts to center.
     * @return the median of medians that has been subtracted from all counts.
     */
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
    final double medianOfMedians = medianCalculator.evaluate(columnMedians);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - medianOfMedians;
        }
    });
    logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
    return medianOfMedians;
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method calculateReducedPanelAndPInverses.

/**
     * SVD and Pseudo inverse calculation.
     *
     * @param logNormalized the input counts for the SVD and reduction steps, fully normalized and already logged.
     * @param requestedNumberOfEigensamples user requested number of eigensamples for the reduced panel.
     * @return never {@code null}.
     */
@VisibleForTesting
static ReductionResult calculateReducedPanelAndPInverses(final ReadCountCollection logNormalized, final OptionalInt requestedNumberOfEigensamples, final Logger logger, final JavaSparkContext ctx) {
    if (ctx == null) {
        logger.warn("No Spark context provided, not going to use Spark...");
    }
    final RealMatrix logNormalizedCounts = logNormalized.counts();
    final int numberOfCountColumns = logNormalizedCounts.getColumnDimension();
    logger.info("Starting the SVD decomposition of the log-normalized counts ...");
    final long svdStartTime = System.currentTimeMillis();
    final SVD logNormalizedSVD = SVDFactory.createSVD(logNormalized.counts(), ctx);
    final long svdEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished the SVD decomposition of the log-normal counts. Elapse of %d seconds", (svdEndTime - svdStartTime) / 1000));
    final int numberOfEigensamples = determineNumberOfEigensamples(requestedNumberOfEigensamples, numberOfCountColumns, logNormalizedSVD, logger);
    logger.info(String.format("Including %d eigensamples in the reduced PoN", numberOfEigensamples));
    final double[] singularValues = logNormalizedSVD.getSingularValues();
    final RealMatrix reducedCounts = logNormalizedSVD.getU().getSubMatrix(0, logNormalizedCounts.getRowDimension() - 1, 0, numberOfEigensamples - 1).copy();
    reducedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return singularValues[column] * value;
        }
    });
    // The Pseudo inverse comes nearly for free from having run the SVD decomposition.
    final RealMatrix logNormalizedPseudoInverse = logNormalizedSVD.getPinv();
    logger.info("Calculating the reduced PoN inverse matrix...");
    final long riStartTime = System.currentTimeMillis();
    final RealMatrix reducedCountsPseudoInverse = SVDFactory.createSVD(reducedCounts, ctx).getPinv();
    final long riEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished calculating the reduced PoN inverse matrix. Elapse of %d seconds", (riEndTime - riStartTime) / 1000));
    return new ReductionResult(logNormalizedPseudoInverse, reducedCounts, reducedCountsPseudoInverse, logNormalizedSVD.getSingularValues());
}
Also used : SVD(org.broadinstitute.hellbender.utils.svd.SVD) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class PCATangentNormalizationUtils method factorNormalize.

/**
     * Target-factor normalizes a {@link RealMatrix} in-place given target factors..
     */
static void factorNormalize(final RealMatrix input, final double[] targetFactors) {
    Utils.nonNull(input, "Input matrix cannot be null.");
    Utils.nonNull(targetFactors, "Target factors cannot be null.");
    Utils.validateArg(targetFactors.length == input.getRowDimension(), "Number of target factors does not correspond to the number of rows.");
    // Divide all counts by the target factor for the row.
    input.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value / targetFactors[row];
        }
    });
}
Also used : DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Aggregations

DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)28 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 VisibleForTesting (com.google.common.annotations.VisibleForTesting)12 IntStream (java.util.stream.IntStream)10 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)10 Median (org.apache.commons.math3.stat.descriptive.rank.Median)10 Utils (org.broadinstitute.hellbender.utils.Utils)8 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 Logger (org.apache.logging.log4j.Logger)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 Doubles (com.google.common.primitives.Doubles)4 File (java.io.File)4 IOException (java.io.IOException)4 java.util (java.util)4 DoubleStream (java.util.stream.DoubleStream)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)4 Pair (org.apache.commons.lang3.tuple.Pair)4 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)4 LogManager (org.apache.logging.log4j.LogManager)4