Examples with DefaultRealMatrixChangingVisitor - org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor

Example 21 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class PCATangentNormalizationUtils method calculateBetaHats.

/**
     * Calculate the beta-hats that best fit case read counts given the panel of normals.
     *
     * @param normalsPseudoinverse the log-normalized or reduced-panel pseudoinverse from a panel of normals
     * @param input a {@code TxS} matrix where {@code T} is the number of targets and {@code S} the number of count groups (e.g. case samples).
     * @return never {@code null} an {@code NxS} matrix, where N is the number of samples in
     *  the panel and S the original name of count groups.
     */
@VisibleForTesting
public static RealMatrix calculateBetaHats(final RealMatrix normalsPseudoinverse, final RealMatrix input, final double epsilon) {
    Utils.nonNull(normalsPseudoinverse, "Normals inverse matrix cannot be null.");
    Utils.nonNull(input, "Input counts cannot be null.");
    Utils.validateArg(epsilon > 0, String.format("Invalid epsilon value, must be > 0: %f", epsilon));
    final double targetThreshold = (Math.log(epsilon) / Math.log(2)) + 1;
    // copy case samples in order to mask targets in-place and mask (set to zero) targets with coverage below threshold
    final RealMatrix maskedInput = input.copy();
    maskedInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value > targetThreshold ? value : 0;
        }
    });
    return normalsPseudoinverse.multiply(maskedInput);
}

Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 22 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method normalizeAndLogReadCounts.

/**
     * Final pre-panel normalization that consists of dividing all counts by the median of
     * its column and log it with base 2.
     * <p>
     *     The normalization occurs in-place.
     * </p>
     *
     * @param readCounts the input counts to normalize.
     */
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
        }
    });
    logger.info("Counts normalized by the column median and log2'd.");
}

Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) SVD(org.broadinstitute.hellbender.utils.svd.SVD) java.util(java.util) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Pair(org.apache.commons.lang3.tuple.Pair) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 23 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method subtractMedianOfMedians.

/**
     * Calculates the median of column medians and subtract it from all counts.
     * @param readCounts the input counts to center.
     * @return the median of medians that has been subtracted from all counts.
     */
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
    final double medianOfMedians = medianCalculator.evaluate(columnMedians);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - medianOfMedians;
        }
    });
    logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
    return medianOfMedians;
}

Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 24 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.

/**
     * Prepares the data to perform tangent normalization.
     * <p>
     * This is done by count group or column:
     *   <ol>
     *     </li>we divide counts by the column mean,</li>
     *     </li>then we transform value to their log_2,</li>
     *     </li>and finally we center them around the median.</li>
     *   </ol>
     * </p>
     *
     * @param matrix input matrix.
     * @return never {@code null}.
     */
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
    final RealMatrix result = matrix.copy();
    // step 1: divide by column means and log_2 transform
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return truncatedLog2(value / columnMeans[column]);
        }
    });
    // step 2: subtract column medians
    final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - columnMedians[column];
        }
    });
    return result;
}

Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) RowMatrix(org.apache.spark.mllib.linalg.distributed.RowMatrix) DenseMatrix(org.apache.spark.mllib.linalg.DenseMatrix) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) CaseToPoNTargetMapper(org.broadinstitute.hellbender.tools.pon.coverage.CaseToPoNTargetMapper) CoveragePanelOfNormals(org.broadinstitute.hellbender.tools.pon.coverage.CoveragePanelOfNormals) Median(org.apache.commons.math3.stat.descriptive.rank.Median) SparkConverter(org.broadinstitute.hellbender.utils.spark.SparkConverter) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Matrix(org.apache.spark.mllib.linalg.Matrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 25 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class PCATangentNormalizationUtils method calculateBetaHats.

/**
     * Calculate the beta-hats that best fit case read counts given the panel of normals.
     *
     * @param normalsPseudoinverse the log-normalized or reduced-panel pseudoinverse from a panel of normals
     * @param input a {@code TxS} matrix where {@code T} is the number of targets and {@code S} the number of count groups (e.g. case samples).
     * @return never {@code null} an {@code NxS} matrix, where N is the number of samples in
     *  the panel and S the original name of count groups.
     */
@VisibleForTesting
public static RealMatrix calculateBetaHats(final RealMatrix normalsPseudoinverse, final RealMatrix input, final double epsilon) {
    Utils.nonNull(normalsPseudoinverse, "Normals inverse matrix cannot be null.");
    Utils.nonNull(input, "Input counts cannot be null.");
    Utils.validateArg(epsilon > 0, String.format("Invalid epsilon value, must be > 0: %f", epsilon));
    final double targetThreshold = (Math.log(epsilon) / Math.log(2)) + 1;
    // copy case samples in order to mask targets in-place and mask (set to zero) targets with coverage below threshold
    final RealMatrix maskedInput = input.copy();
    maskedInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value > targetThreshold ? value : 0;
        }
    });
    return normalsPseudoinverse.multiply(maskedInput);
}

Aggregations

DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)28 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 VisibleForTesting (com.google.common.annotations.VisibleForTesting)12 IntStream (java.util.stream.IntStream)10 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)10 Median (org.apache.commons.math3.stat.descriptive.rank.Median)10 Utils (org.broadinstitute.hellbender.utils.Utils)8 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 Logger (org.apache.logging.log4j.Logger)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 Doubles (com.google.common.primitives.Doubles)4 File (java.io.File)4 IOException (java.io.IOException)4 java.util (java.util)4 DoubleStream (java.util.stream.DoubleStream)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)4 Pair (org.apache.commons.lang3.tuple.Pair)4 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)4 LogManager (org.apache.logging.log4j.LogManager)4