use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.
the class MatrixSummaryUtils method getRowMedians.
/**
* Return an array containing the median for each row in the given matrix.
* @param m Not {@code null}. Size MxN. If any entry is NaN, it is disregarded
* in the calculation.
* @return array of size M. Never {@code null}
*/
public static double[] getRowMedians(final RealMatrix m) {
Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
final Median medianCalculator = new Median();
return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getRow(i))).toArray();
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.
the class ReadCountCollectionUtilsUnitTest method testImputeZeroCounts.
@Test(dataProvider = "tooManyZerosData")
public void testImputeZeroCounts(final ReadCountCollection readCounts) {
final Median median = new Median();
final RealMatrix counts = readCounts.counts();
final double[] targetNonZeroMedians = IntStream.range(0, counts.getRowDimension()).mapToDouble(i -> median.evaluate(DoubleStream.of(counts.getRow(i)).filter(d -> d != 0.0).toArray())).toArray();
final double[][] expected = new double[counts.getRowDimension()][];
final double[][] original = counts.getData();
for (int i = 0; i < expected.length; i++) {
final double[] rowCounts = counts.getRow(i).clone();
expected[i] = rowCounts;
for (int j = 0; j < expected[i].length; j++) {
if (expected[i][j] == 0.0) {
expected[i][j] = targetNonZeroMedians[i];
}
}
}
ReadCountCollectionUtils.imputeZeroCountsAsTargetMedians(readCounts, NULL_LOGGER);
final RealMatrix newCounts = readCounts.counts();
Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
Assert.assertEquals(newCounts.getRowDimension(), expected.length);
for (int i = 0; i < expected.length; i++) {
for (int j = 0; j < expected[i].length; j++) {
Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], "i,j == " + i + "," + j + " " + original[i][j]);
}
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk by broadinstitute.
the class ReadCountCollectionUtilsUnitTest method testExtremeMedianColumnsData.
@Test(dataProvider = "readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
final Median median = new Median();
final RealMatrix counts = readCount.counts();
final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
final double top = new Percentile(100 - percentile).evaluate(columnMedians);
final double bottom = new Percentile(percentile).evaluate(columnMedians);
final Boolean[] toBeKept = DoubleStream.of(columnMedians).mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
int nextIndex = 0;
for (int i = 0; i < toBeKept.length; i++) {
if (toBeKept[i]) {
int index = result.columnNames().indexOf(readCount.columnNames().get(i));
Assert.assertEquals(index, nextIndex++);
Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
} else {
Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
}
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class MatrixSummaryUtils method getColumnMedians.
/**
* Return an array containing the median for each column in the given matrix.
* @param m Not {@code null}. Size MxN, where neither dimension is zero. If any entry is NaN, it is disregarded
* in the calculation.
* @return array of size N. Never {@code null}
*/
public static double[] getColumnMedians(final RealMatrix m) {
Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
final Median medianCalculator = new Median();
return IntStream.range(0, m.getColumnDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getColumn(i))).toArray();
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class MatrixSummaryUtils method getRowMedians.
/**
* Return an array containing the median for each row in the given matrix.
* @param m Not {@code null}. Size MxN. If any entry is NaN, it is disregarded
* in the calculation.
* @return array of size M. Never {@code null}
*/
public static double[] getRowMedians(final RealMatrix m) {
Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
final Median medianCalculator = new Median();
return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> medianCalculator.evaluate(m.getRow(i))).toArray();
}
Aggregations