Search in sources :

Example 86 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class PCATangentNormalizationUtils method factorNormalize.

/**
     * Target-factor normalizes a {@link RealMatrix} in-place given target factors..
     */
static void factorNormalize(final RealMatrix input, final double[] targetFactors) {
    Utils.nonNull(input, "Input matrix cannot be null.");
    Utils.nonNull(targetFactors, "Target factors cannot be null.");
    Utils.validateArg(targetFactors.length == input.getRowDimension(), "Number of target factors does not correspond to the number of rows.");
    // Divide all counts by the target factor for the row.
    input.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value / targetFactors[row];
        }
    });
}
Also used : DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 87 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.

/**
     * Prepares the data to perform tangent normalization.
     * <p>
     * This is done by count group or column:
     *   <ol>
     *     </li>we divide counts by the column mean,</li>
     *     </li>then we transform value to their log_2,</li>
     *     </li>and finally we center them around the median.</li>
     *   </ol>
     * </p>
     *
     * @param matrix input matrix.
     * @return never {@code null}.
     */
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
    final RealMatrix result = matrix.copy();
    // step 1: divide by column means and log_2 transform
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return truncatedLog2(value / columnMeans[column]);
        }
    });
    // step 2: subtract column medians
    final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - columnMedians[column];
        }
    });
    return result;
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) RowMatrix(org.apache.spark.mllib.linalg.distributed.RowMatrix) DenseMatrix(org.apache.spark.mllib.linalg.DenseMatrix) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) CaseToPoNTargetMapper(org.broadinstitute.hellbender.tools.pon.coverage.CaseToPoNTargetMapper) CoveragePanelOfNormals(org.broadinstitute.hellbender.tools.pon.coverage.CoveragePanelOfNormals) Median(org.apache.commons.math3.stat.descriptive.rank.Median) SparkConverter(org.broadinstitute.hellbender.utils.spark.SparkConverter) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Matrix(org.apache.spark.mllib.linalg.Matrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 88 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class SegmentUtilsUnitTest method testUnionSegments.

/**
     * Test for {@link SegmentUtils#unionSegments}.  Expected behavior:
     * <p>
     * On chr1 {@link SegmentUtils#collectBreakpointsByContig} gives:
     * </p>
     *      <p>
     *      1, 5, 10, 20, 40, 40, 42, 90, 91, 115, 125, 140.
     *      </p>
     * <p>
     * Then {@link SegmentUtils#constructUntrimmedSegments} finds the segments:
     * </p>
     *      <p>
     *      [1, 4], [5, 10], [11, 19], [20, 40], [41, 41], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
     *      </p>
     * <p>
     * and returns the non-empty segments:
     * </p>
     *      <p>
     *      [1, 4], [5, 10], [20, 40], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
     *      </p>
     * <p>
     * Then {@link SegmentUtils#mergeSpuriousStartsAndEnds} merges the last segment left to form [115, 140],
     * and {@link SegmentMergeUtils#mergeSpuriousMiddles} randomly merges segment [92, 114] left or right.
     * </p>
     * <p>
     * Finally, {@link SegmentUtils#trimInterval} gives:
     * </p>
     *      <p>
     *      [1, 10], [20, 40], [42, 42], [90, 114], [115, 140] (if [92, 114] merged left) or
     *      </p>
     *      <p>
     *      [1, 10], [20, 40], [42, 42], [90, 91], [92, 140] (if [92, 114] merged right)
     *      </p>
     * <p>
     * The remaining empty segment on chr2 is retained.
     */
@Test
public void testUnionSegments() {
    final String sampleName = "placeholder_sample_name";
    final List<Target> targets = new ArrayList<Target>();
    targets.add(new Target("t1", new SimpleInterval("chr1", 1, 10)));
    targets.add(new Target("t2", new SimpleInterval("chr1", 20, 30)));
    targets.add(new Target("t3", new SimpleInterval("chr1", 31, 40)));
    targets.add(new Target("t4", new SimpleInterval("chr1", 90, 100)));
    targets.add(new Target("t5", new SimpleInterval("chr1", 110, 120)));
    targets.add(new Target("t6", new SimpleInterval("chr1", 130, 140)));
    final RealMatrix zeroCoverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
    final ReadCountCollection counts = new ReadCountCollection(targets, Collections.singletonList(sampleName), zeroCoverageMatrix);
    final AllelicCount snp1 = new AllelicCount(new SimpleInterval("chr1", 5, 5), 0, 1);
    final AllelicCount snp2 = new AllelicCount(new SimpleInterval("chr1", 40, 40), 0, 1);
    final AllelicCount snp3 = new AllelicCount(new SimpleInterval("chr1", 42, 42), 0, 1);
    final AllelicCount snp4 = new AllelicCount(new SimpleInterval("chr1", 91, 91), 0, 1);
    final AllelicCount snp5 = new AllelicCount(new SimpleInterval("chr1", 115, 115), 0, 1);
    final AllelicCount snp6 = new AllelicCount(new SimpleInterval("chr1", 125, 125), 0, 1);
    final AllelicCount snp7 = new AllelicCount(new SimpleInterval("chr2", 10, 10), 0, 1);
    final List<AllelicCount> snps = Arrays.asList(snp1, snp2, snp3, snp4, snp5, snp6, snp7);
    final List<SimpleInterval> targetSegments = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 90, 140));
    final List<SimpleInterval> snpSegments = Arrays.asList(new SimpleInterval("chr1", 5, 40), new SimpleInterval("chr1", 42, 91), new SimpleInterval("chr1", 115, 125), new SimpleInterval("chr2", 10, 10));
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, new Genome(counts, snps));
    final List<SimpleInterval> expectedLeft = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 114), new SimpleInterval("chr1", 115, 140), new SimpleInterval("chr2", 10, 10));
    final List<SimpleInterval> expectedRight = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 91), new SimpleInterval("chr1", 92, 140), new SimpleInterval("chr2", 10, 10));
    Assert.assertTrue(unionedSegments.equals(expectedLeft) || unionedSegments.equals(expectedRight));
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 89 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testImputeZeroCounts.

@Test(dataProvider = "tooManyZerosData")
public void testImputeZeroCounts(final ReadCountCollection readCounts) {
    final Median median = new Median();
    final RealMatrix counts = readCounts.counts();
    final double[] targetNonZeroMedians = IntStream.range(0, counts.getRowDimension()).mapToDouble(i -> median.evaluate(DoubleStream.of(counts.getRow(i)).filter(d -> d != 0.0).toArray())).toArray();
    final double[][] expected = new double[counts.getRowDimension()][];
    final double[][] original = counts.getData();
    for (int i = 0; i < expected.length; i++) {
        final double[] rowCounts = counts.getRow(i).clone();
        expected[i] = rowCounts;
        for (int j = 0; j < expected[i].length; j++) {
            if (expected[i][j] == 0.0) {
                expected[i][j] = targetNonZeroMedians[i];
            }
        }
    }
    ReadCountCollectionUtils.imputeZeroCountsAsTargetMedians(readCounts, NULL_LOGGER);
    final RealMatrix newCounts = readCounts.counts();
    Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
    Assert.assertEquals(newCounts.getRowDimension(), expected.length);
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], "i,j == " + i + "," + j + " " + original[i][j]);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Example 90 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testTruncateExtremeCounts.

@Test(dataProvider = "readCountAndPercentileData")
public void testTruncateExtremeCounts(final ReadCountCollection readCount, final double percentile) {
    final RealMatrix counts = readCount.counts();
    final double[] allCounts = Stream.of(counts.getData()).flatMap(row -> DoubleStream.of(row).boxed()).mapToDouble(Double::doubleValue).toArray();
    final double bottom = new Percentile(percentile).evaluate(allCounts);
    final double top = new Percentile(100 - percentile).evaluate(allCounts);
    final double[][] expected = new double[counts.getRowDimension()][];
    for (int i = 0; i < expected.length; i++) {
        expected[i] = DoubleStream.of(counts.getRow(i)).map(d -> d < bottom ? bottom : (d > top) ? top : d).toArray();
    }
    ReadCountCollectionUtils.truncateExtremeCounts(readCount, percentile, NULL_LOGGER);
    final RealMatrix newCounts = readCount.counts();
    Assert.assertEquals(newCounts.getRowDimension(), newCounts.getRowDimension());
    Assert.assertEquals(newCounts.getColumnDimension(), newCounts.getColumnDimension());
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j]);
        }
    }
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Test(org.testng.annotations.Test)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)259 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)158 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 List (java.util.List)37 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 ArrayList (java.util.ArrayList)32 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27