Search in sources :

Example 56 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class CNLOHCaller method sumOverSegments.

private double[] sumOverSegments(final JavaRDD<double[]> eZskBySeg) {
    // S x K
    final double[][] eZskBySeg2D = eZskBySeg.collect().stream().toArray(double[][]::new);
    final RealMatrix eZskBySegMatrix = MatrixUtils.createRealMatrix(eZskBySeg2D);
    return GATKProtectedMathUtils.columnSums(eZskBySegMatrix);
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 57 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class XHMMSegmentCallerBase method standardizeBySample.

/**
     * Standardize read counts (per-sample).
     * Note: modification is done in-place.
     *
     * @param counts original read counts
     */
private void standardizeBySample(final RealMatrix counts) {
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(counts);
    final double[] columnStdDev = GATKProtectedMathUtils.columnStdDevs(counts);
    counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int row, int column, double value) {
            return (value - columnMeans[column]) / columnStdDev[column];
        }
    });
}
Also used : DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 58 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method calculateTargetVariances.

/**
     * Determine the variance for each target in the PoN (panel targets).
     *
     * @return      array of doubles where each double corresponds to a target in the PoN (panel targets)
     */
private static double[] calculateTargetVariances(final ReadCountCollection normalizedCounts, final List<String> panelTargetNames, final ReductionResult reduction, final JavaSparkContext ctx) {
    Utils.nonNull(panelTargetNames);
    Utils.nonNull(normalizedCounts);
    Utils.nonNull(reduction);
    final PCATangentNormalizationResult allNormals = PCATangentNormalizationUtils.tangentNormalizeNormalsInPoN(normalizedCounts, panelTargetNames, reduction.getReducedCounts(), reduction.getReducedPseudoInverse(), ctx);
    final RealMatrix allSampleProjectedTargets = allNormals.getTangentNormalized().counts();
    return MatrixSummaryUtils.getRowVariances(allSampleProjectedTargets);
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 59 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method normalizeAndLogReadCounts.

/**
     * Final pre-panel normalization that consists of dividing all counts by the median of
     * its column and log it with base 2.
     * <p>
     *     The normalization occurs in-place.
     * </p>
     *
     * @param readCounts the input counts to normalize.
     */
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
        }
    });
    logger.info("Counts normalized by the column median and log2'd.");
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) SVD(org.broadinstitute.hellbender.utils.svd.SVD) java.util(java.util) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Pair(org.apache.commons.lang3.tuple.Pair) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 60 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method subtractMedianOfMedians.

/**
     * Calculates the median of column medians and subtract it from all counts.
     * @param readCounts the input counts to center.
     * @return the median of medians that has been subtracted from all counts.
     */
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
    final double medianOfMedians = medianCalculator.evaluate(columnMedians);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - medianOfMedians;
        }
    });
    logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
    return medianOfMedians;
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)218 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)148 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 List (java.util.List)34 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27 ArrayList (java.util.ArrayList)26