Search in sources :

Example 16 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.

/**
     * Prepares the data to perform tangent normalization.
     * <p>
     * This is done by count group or column:
     *   <ol>
     *     </li>we divide counts by the column mean,</li>
     *     </li>then we transform value to their log_2,</li>
     *     </li>and finally we center them around the median.</li>
     *   </ol>
     * </p>
     *
     * @param matrix input matrix.
     * @return never {@code null}.
     */
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
    final RealMatrix result = matrix.copy();
    // step 1: divide by column means and log_2 transform
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return truncatedLog2(value / columnMeans[column]);
        }
    });
    // step 2: subtract column medians
    final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - columnMedians[column];
        }
    });
    return result;
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) RowMatrix(org.apache.spark.mllib.linalg.distributed.RowMatrix) DenseMatrix(org.apache.spark.mllib.linalg.DenseMatrix) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) CaseToPoNTargetMapper(org.broadinstitute.hellbender.tools.pon.coverage.CaseToPoNTargetMapper) CoveragePanelOfNormals(org.broadinstitute.hellbender.tools.pon.coverage.CoveragePanelOfNormals) Median(org.apache.commons.math3.stat.descriptive.rank.Median) SparkConverter(org.broadinstitute.hellbender.utils.spark.SparkConverter) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Matrix(org.apache.spark.mllib.linalg.Matrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 17 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class GCCorrector method correctCoverage.

/**
     *
     * @param inputCounts raw coverage before GC correction
     * @param gcContentByTarget      array of gc contents, one per target of the input
     * @return              GC-corrected coverage
     */
public static ReadCountCollection correctCoverage(final ReadCountCollection inputCounts, final double[] gcContentByTarget) {
    // each column (sample) has its own GC bias curve, hence its own GC corrector
    final List<GCCorrector> gcCorrectors = IntStream.range(0, inputCounts.columnNames().size()).mapToObj(n -> new GCCorrector(gcContentByTarget, inputCounts.counts().getColumnVector(n))).collect(Collectors.toList());
    // gc correct a copy of the input counts in-place
    final RealMatrix correctedCounts = inputCounts.counts().copy();
    correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int target, int column, double coverage) {
            return gcCorrectors.get(column).correctedCoverage(coverage, gcContentByTarget[target]);
        }
    });
    // we would like the average correction factor to be 1.0 in the sense that average coverage before and after
    // correction should be equal
    final double[] columnNormalizationFactors = IntStream.range(0, inputCounts.columnNames().size()).mapToDouble(c -> inputCounts.counts().getColumnVector(c).getL1Norm() / correctedCounts.getColumnVector(c).getL1Norm()).toArray();
    correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int target, int column, double coverage) {
            return coverage * columnNormalizationFactors[column];
        }
    });
    return new ReadCountCollection(inputCounts.targets(), inputCounts.columnNames(), correctedCounts);
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) List(java.util.List) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Example 18 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHatsRobustToOutliers.

/**
     * Asserts that the calculation of beta hats is not significantly affected by zero-coverage outlier counts
     * We perform this check by randomly setting some coverages to zero in copy ratio space (-infinity in log space).
     * betaHats imputes 0 in log space (1 in copy ratio space) whenever coverage is below a certain low threshold
     * and should thus be robust to this type of noise.
     */
private void assertBetaHatsRobustToOutliers(final ReadCountCollection preTangentNormalized, final File ponFile) {
    try (final HDF5File ponReader = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
        final List<String> ponTargets = pon.getPanelTargetNames();
        final RealMatrix input = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
        // randomly set some entries to zero in copy-ratio space (-infinity in log space)
        final Random random = new Random(13);
        final double noiseProportion = 0.01;
        final RealMatrix noisyInput = input.copy();
        noisyInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

            @Override
            public double visit(final int row, final int column, final double value) {
                return random.nextDouble() < noiseProportion ? Double.NEGATIVE_INFINITY : value;
            }
        });
        final RealMatrix betaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), input, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix noisyBetaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), noisyInput, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix difference = betaHats.subtract(noisyBetaHats);
        difference.walkInOptimizedOrder(new DefaultRealMatrixPreservingVisitor() {

            @Override
            public void visit(final int row, int column, double value) {
                Assert.assertEquals(value, 0, 0.01);
            }
        });
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) DefaultRealMatrixPreservingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 19 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.

the class HDF5LibraryUnitTest method createMatrixOfGaussianValues.

private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
    final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return bigCounts;
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 20 with DefaultRealMatrixChangingVisitor

use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.

the class ReadCountCollectionUtils method imputeZeroCountsAsTargetMedians.

/**
     * Impute zero counts to the median of non-zero values in the enclosing target row.
     *
     * <p>The imputation is done in-place, thus the input matrix is well be modified as a result of this call.</p>
     *
     * @param readCounts the input and output read-count matrix.
     */
public static void imputeZeroCountsAsTargetMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();
    final Median medianCalculator = new Median();
    int totalCounts = counts.getColumnDimension() * counts.getRowDimension();
    // Get the number of zeroes contained in the counts.
    final long totalZeroCounts = IntStream.range(0, targetCount).mapToLong(t -> DoubleStream.of(counts.getRow(t)).filter(c -> c == 0.0).count()).sum();
    // Get the median of each row, not including any entries that are zero.
    final double[] medians = IntStream.range(0, targetCount).mapToDouble(t -> medianCalculator.evaluate(DoubleStream.of(counts.getRow(t)).filter(c -> c != 0.0).toArray())).toArray();
    // Change any zeros in the counts to the median for the row.
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value != 0 ? value : medians[row];
        }
    });
    if (totalZeroCounts > 0) {
        final double totalZeroCountsPercentage = 100.0 * (totalZeroCounts / totalCounts);
        logger.info(String.format("Some 0.0 counts (%d out of %d, %.2f%%) were imputed to their enclosing target " + "non-zero median", totalZeroCounts, totalZeroCounts, totalZeroCountsPercentage));
    } else {
        logger.info("No count is 0.0 thus no count needed to be imputed.");
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Aggregations

DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)28 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 VisibleForTesting (com.google.common.annotations.VisibleForTesting)12 IntStream (java.util.stream.IntStream)10 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)10 Median (org.apache.commons.math3.stat.descriptive.rank.Median)10 Utils (org.broadinstitute.hellbender.utils.Utils)8 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 Logger (org.apache.logging.log4j.Logger)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 Doubles (com.google.common.primitives.Doubles)4 File (java.io.File)4 IOException (java.io.IOException)4 java.util (java.util)4 DoubleStream (java.util.stream.DoubleStream)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)4 Pair (org.apache.commons.lang3.tuple.Pair)4 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)4 LogManager (org.apache.logging.log4j.LogManager)4