Search in sources :

Example 6 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.

the class MatrixSummaryUtils method getRowMedians.

/**
     * Return an array containing the median for each row in the given matrix.
     * @param m Not {@code null}.  Size MxN.    If any entry is NaN, it is disregarded
     *          in the calculation.
     * @return array of size M.  Never {@code null}
     */
public static double[] getRowMedians(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final Median medianCalculator = new Median();
    return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getRow(i))).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Median(org.apache.commons.math3.stat.descriptive.rank.Median) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 7 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testImputeZeroCounts.

@Test(dataProvider = "tooManyZerosData")
public void testImputeZeroCounts(final ReadCountCollection readCounts) {
    final Median median = new Median();
    final RealMatrix counts = readCounts.counts();
    final double[] targetNonZeroMedians = IntStream.range(0, counts.getRowDimension()).mapToDouble(i -> median.evaluate(DoubleStream.of(counts.getRow(i)).filter(d -> d != 0.0).toArray())).toArray();
    final double[][] expected = new double[counts.getRowDimension()][];
    final double[][] original = counts.getData();
    for (int i = 0; i < expected.length; i++) {
        final double[] rowCounts = counts.getRow(i).clone();
        expected[i] = rowCounts;
        for (int j = 0; j < expected[i].length; j++) {
            if (expected[i][j] == 0.0) {
                expected[i][j] = targetNonZeroMedians[i];
            }
        }
    }
    ReadCountCollectionUtils.imputeZeroCountsAsTargetMedians(readCounts, NULL_LOGGER);
    final RealMatrix newCounts = readCounts.counts();
    Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
    Assert.assertEquals(newCounts.getRowDimension(), expected.length);
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], "i,j == " + i + "," + j + " " + original[i][j]);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Example 8 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testExtremeMedianColumnsData.

@Test(dataProvider = "readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
    final Median median = new Median();
    final RealMatrix counts = readCount.counts();
    final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
    final double top = new Percentile(100 - percentile).evaluate(columnMedians);
    final double bottom = new Percentile(percentile).evaluate(columnMedians);
    final Boolean[] toBeKept = DoubleStream.of(columnMedians).mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
    final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
    final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
    Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
    int nextIndex = 0;
    for (int i = 0; i < toBeKept.length; i++) {
        if (toBeKept[i]) {
            int index = result.columnNames().indexOf(readCount.columnNames().get(i));
            Assert.assertEquals(index, nextIndex++);
            Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
        } else {
            Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Example 9 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.

the class MatrixSummaryUtils method getColumnMedians.

/**
     * Return an array containing the median for each column in the given matrix.
     * @param m Not {@code null}.  Size MxN, where neither dimension is zero.  If any entry is NaN, it is disregarded
     *          in the calculation.
     * @return array of size N.  Never {@code null}
     */
public static double[] getColumnMedians(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final Median medianCalculator = new Median();
    return IntStream.range(0, m.getColumnDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getColumn(i))).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Median(org.apache.commons.math3.stat.descriptive.rank.Median) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 10 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.

the class MatrixSummaryUtils method getRowMedians.

/**
     * Return an array containing the median for each row in the given matrix.
     * @param m Not {@code null}.  Size MxN.    If any entry is NaN, it is disregarded
     *          in the calculation.
     * @return array of size M.  Never {@code null}
     */
public static double[] getRowMedians(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final Median medianCalculator = new Median();
    return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getRow(i))).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Median(org.apache.commons.math3.stat.descriptive.rank.Median) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Aggregations

Median (org.apache.commons.math3.stat.descriptive.rank.Median)35 RealMatrix (org.apache.commons.math3.linear.RealMatrix)29 IntStream (java.util.stream.IntStream)28 Collectors (java.util.stream.Collectors)24 Logger (org.apache.logging.log4j.Logger)24 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)22 DoubleStream (java.util.stream.DoubleStream)20 File (java.io.File)18 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)17 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)16 List (java.util.List)15 ArrayList (java.util.ArrayList)14 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)14 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 java.util (java.util)13 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)12 LogManager (org.apache.logging.log4j.LogManager)12