use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtils method getContigToMedianCRMap.
@VisibleForTesting
static Map<String, Double> getContigToMedianCRMap(final ReadCountCollection readCountCollection) {
final List<String> allContigsPresent = retrieveAllContigsPresent(readCountCollection);
final Map<String, Double> contigToMedian = new LinkedHashMap<>();
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = readCountCollection.subsetTargets(readCountCollection.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final double[] flatCounts = Doubles.concat(oneContigReadCountCollection.counts().getData());
// Put into CRSpace
final double[] flatCountsInCRSpace = DoubleStream.of(flatCounts).map(d -> Math.pow(2, d)).toArray();
contigToMedian.put(contig, new Median().evaluate(flatCountsInCRSpace));
}
return contigToMedian;
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.
/**
* Prepares the data to perform tangent normalization.
* <p>
* This is done by count group or column:
* <ol>
* </li>we divide counts by the column mean,</li>
* </li>then we transform value to their log_2,</li>
* </li>and finally we center them around the median.</li>
* </ol>
* </p>
*
* @param matrix input matrix.
* @return never {@code null}.
*/
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
final RealMatrix result = matrix.copy();
// step 1: divide by column means and log_2 transform
final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return truncatedLog2(value / columnMeans[column]);
}
});
// step 2: subtract column medians
final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value - columnMedians[column];
}
});
return result;
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtilsUnitTest method testImputeZeroCounts.
@Test(dataProvider = "tooManyZerosData")
public void testImputeZeroCounts(final ReadCountCollection readCounts) {
final Median median = new Median();
final RealMatrix counts = readCounts.counts();
final double[] targetNonZeroMedians = IntStream.range(0, counts.getRowDimension()).mapToDouble(i -> median.evaluate(DoubleStream.of(counts.getRow(i)).filter(d -> d != 0.0).toArray())).toArray();
final double[][] expected = new double[counts.getRowDimension()][];
final double[][] original = counts.getData();
for (int i = 0; i < expected.length; i++) {
final double[] rowCounts = counts.getRow(i).clone();
expected[i] = rowCounts;
for (int j = 0; j < expected[i].length; j++) {
if (expected[i][j] == 0.0) {
expected[i][j] = targetNonZeroMedians[i];
}
}
}
ReadCountCollectionUtils.imputeZeroCountsAsTargetMedians(readCounts, NULL_LOGGER);
final RealMatrix newCounts = readCounts.counts();
Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
Assert.assertEquals(newCounts.getRowDimension(), expected.length);
for (int i = 0; i < expected.length; i++) {
for (int j = 0; j < expected[i].length; j++) {
Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], "i,j == " + i + "," + j + " " + original[i][j]);
}
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtilsUnitTest method testExtremeMedianColumnsData.
@Test(dataProvider = "readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
final Median median = new Median();
final RealMatrix counts = readCount.counts();
final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
final double top = new Percentile(100 - percentile).evaluate(columnMedians);
final double bottom = new Percentile(percentile).evaluate(columnMedians);
final Boolean[] toBeKept = DoubleStream.of(columnMedians).mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
int nextIndex = 0;
for (int i = 0; i < toBeKept.length; i++) {
if (toBeKept[i]) {
int index = result.columnNames().indexOf(readCount.columnNames().get(i));
Assert.assertEquals(index, nextIndex++);
Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
} else {
Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
}
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project FSensor by KalebKE.
the class CalibrationUtil method getCalibration.
/**
* Transforms the ellipsoid into a sphere with the offset vector = [0,0,0]
* and the radii vector = [1,1,1].
*
* @param fitPoints the representation of the calibration ellipsoid
*/
public static Calibration getCalibration(FitPoints fitPoints) {
// The scalar values to transform the radii vector into [1,1,1]
RealMatrix scalar = new Array2DRowRealMatrix(3, 3);
// RIV determines the magnitude of the radii. We have to know the
// magnitudes because the eigenvalues, and thus the radii, are returned
// in ascending order. Without knowing the magnitudes, we wouldn't know
// what radii to apply to what axis.
// Find the max and minimum magnitudes.
double max = fitPoints.riv.getEntry(0);
double min = fitPoints.riv.getEntry(0);
// The indexes of the maximum, median, and minimum radii.
// Note that these are the opposite of the max and min
// because a smaller riv value means a greater magnitude.
int maxi = 0, midi = 0, mini = 0;
// Find max and min radii
for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
if (fitPoints.riv.getEntry(i) > max) {
max = fitPoints.riv.getEntry(i);
mini = i;
}
if (fitPoints.riv.getEntry(i) < min) {
min = fitPoints.riv.getEntry(i);
maxi = i;
}
}
// Find median radii
for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
if (fitPoints.riv.getEntry(i) < max && fitPoints.riv.getEntry(i) > min) {
midi = i;
}
}
// Create the scalar vector in the correct orientation.
scalar.setEntry(0, 0, 1 / fitPoints.radii.getEntry(mini));
scalar.setEntry(1, 1, 1 / fitPoints.radii.getEntry(midi));
scalar.setEntry(2, 2, 1 / fitPoints.radii.getEntry(maxi));
return new Calibration(scalar, fitPoints.center);
}
Aggregations