use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.
the class RandomDNAUnitTest method checkResults.
public void checkResults(final int[] results, final int n, final int m) {
final double[] dresults = MathUtils.promote(results);
final double mean = MathUtils.mean(dresults, 0, dresults.length);
final double std = new StandardDeviation().evaluate(dresults);
final double expectedMean = (n * m) / 4.0;
// not really because it's the population not the sample dtd but it'll do
final double s = std;
Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
Assert.assertTrue(mean > expectedMean - 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.
the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.
/**
* Prepares the data to perform tangent normalization.
* <p>
* This is done by count group or column:
* <ol>
* </li>we divide counts by the column mean,</li>
* </li>then we transform value to their log_2,</li>
* </li>and finally we center them around the median.</li>
* </ol>
* </p>
*
* @param matrix input matrix.
* @return never {@code null}.
*/
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
final RealMatrix result = matrix.copy();
// step 1: divide by column means and log_2 transform
final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return truncatedLog2(value / columnMeans[column]);
}
});
// step 2: subtract column medians
final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value - columnMedians[column];
}
});
return result;
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.
the class SliceSamplerUnitTest method testSliceSamplingOfMonotonicBetaDistribution.
/**
* Test slice sampling of a monotonic beta distribution as an example of sampling of a bounded random variable.
* Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
* respectively.
*/
@Test
public void testSliceSamplingOfMonotonicBetaDistribution() {
rng.setSeed(RANDOM_SEED);
final double alpha = 10.;
final double beta = 1.;
final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
final double xInitial = 0.5;
final double xMin = 0.;
final double xMax = 1.;
final double width = 0.1;
final int numSamples = 10000;
final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
final double mean = betaDistribution.getNumericalMean();
final double variance = betaDistribution.getNumericalVariance();
final double sampleMean = new Mean().evaluate(samples);
final double sampleVariance = new Variance().evaluate(samples);
Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.
the class SliceSamplerUnitTest method testSliceSamplingOfPeakedBetaDistribution.
/**
* Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
* Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
* respectively.
*/
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
rng.setSeed(RANDOM_SEED);
final double alpha = 10.;
final double beta = 4.;
final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
final double xInitial = 0.5;
final double xMin = 0.;
final double xMax = 1.;
final double width = 0.1;
final int numSamples = 10000;
final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
final double mean = betaDistribution.getNumericalMean();
final double variance = betaDistribution.getNumericalVariance();
final double sampleMean = new Mean().evaluate(samples);
final double sampleVariance = new Variance().evaluate(samples);
Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project metron by apache.
the class MedianAbsoluteDeviationTest method testLongTailed.
@Test
public void testLongTailed() {
TDistribution generator = new TDistribution(new MersenneTwister(0L), 100);
DescriptiveStatistics stats = new DescriptiveStatistics();
List<MedianAbsoluteDeviationFunctions.State> states = new ArrayList<>();
MedianAbsoluteDeviationFunctions.State currentState = null;
// initialize the state
currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, NULL)", ImmutableMap.of("states", states));
for (int i = 0, j = 0; i < 10000; ++i, ++j) {
Double d = generator.sample();
stats.addValue(d);
run("OUTLIER_MAD_ADD(currentState, data)", ImmutableMap.of("currentState", currentState, "data", d));
if (j >= 1000) {
j = 0;
List<MedianAbsoluteDeviationFunctions.State> stateWindow = new ArrayList<>();
for (int stateIndex = Math.max(0, states.size() - 5); stateIndex < states.size(); ++stateIndex) {
stateWindow.add(states.get(stateIndex));
}
currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, currentState)", ImmutableMap.of("states", stateWindow, "currentState", currentState));
}
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMin()));
Assert.assertTrue("Score: " + score + " is not an outlier despite being a minimum.", score > 3.5);
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMax()));
Assert.assertTrue("Score: " + score + " is not an outlier despite being a maximum", score > 3.5);
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() + 4 * stats.getStandardDeviation()));
Assert.assertTrue("Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5);
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() - 4 * stats.getStandardDeviation()));
Assert.assertTrue("Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5);
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean()));
Assert.assertFalse("Score: " + score + " is an outlier despite being the mean", score > 3.5);
}
}
Aggregations