Search in sources :

Example 31 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class RandomDNAUnitTest method checkResults.

public void checkResults(final int[] results, final int n, final int m) {
    final double[] dresults = MathUtils.promote(results);
    final double mean = MathUtils.mean(dresults, 0, dresults.length);
    final double std = new StandardDeviation().evaluate(dresults);
    final double expectedMean = (n * m) / 4.0;
    // not really because it's the population not the sample dtd but it'll do
    final double s = std;
    Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
    Assert.assertTrue(mean > expectedMean - 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
}
Also used : StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)

Example 32 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.

/**
     * Prepares the data to perform tangent normalization.
     * <p>
     * This is done by count group or column:
     *   <ol>
     *     </li>we divide counts by the column mean,</li>
     *     </li>then we transform value to their log_2,</li>
     *     </li>and finally we center them around the median.</li>
     *   </ol>
     * </p>
     *
     * @param matrix input matrix.
     * @return never {@code null}.
     */
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
    final RealMatrix result = matrix.copy();
    // step 1: divide by column means and log_2 transform
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return truncatedLog2(value / columnMeans[column]);
        }
    });
    // step 2: subtract column medians
    final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value - columnMedians[column];
        }
    });
    return result;
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) RowMatrix(org.apache.spark.mllib.linalg.distributed.RowMatrix) DenseMatrix(org.apache.spark.mllib.linalg.DenseMatrix) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) CaseToPoNTargetMapper(org.broadinstitute.hellbender.tools.pon.coverage.CaseToPoNTargetMapper) CoveragePanelOfNormals(org.broadinstitute.hellbender.tools.pon.coverage.CoveragePanelOfNormals) Median(org.apache.commons.math3.stat.descriptive.rank.Median) SparkConverter(org.broadinstitute.hellbender.utils.spark.SparkConverter) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Matrix(org.apache.spark.mllib.linalg.Matrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 33 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class SliceSamplerUnitTest method testSliceSamplingOfMonotonicBetaDistribution.

/**
     * Test slice sampling of a monotonic beta distribution as an example of sampling of a bounded random variable.
     * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
     * respectively.
     */
@Test
public void testSliceSamplingOfMonotonicBetaDistribution() {
    rng.setSeed(RANDOM_SEED);
    final double alpha = 10.;
    final double beta = 1.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Test(org.testng.annotations.Test)

Example 34 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class SliceSamplerUnitTest method testSliceSamplingOfPeakedBetaDistribution.

/**
     * Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
     * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
     * respectively.
     */
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
    rng.setSeed(RANDOM_SEED);
    final double alpha = 10.;
    final double beta = 4.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Test(org.testng.annotations.Test)

Example 35 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project metron by apache.

the class MedianAbsoluteDeviationTest method testLongTailed.

@Test
public void testLongTailed() {
    TDistribution generator = new TDistribution(new MersenneTwister(0L), 100);
    DescriptiveStatistics stats = new DescriptiveStatistics();
    List<MedianAbsoluteDeviationFunctions.State> states = new ArrayList<>();
    MedianAbsoluteDeviationFunctions.State currentState = null;
    // initialize the state
    currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, NULL)", ImmutableMap.of("states", states));
    for (int i = 0, j = 0; i < 10000; ++i, ++j) {
        Double d = generator.sample();
        stats.addValue(d);
        run("OUTLIER_MAD_ADD(currentState, data)", ImmutableMap.of("currentState", currentState, "data", d));
        if (j >= 1000) {
            j = 0;
            List<MedianAbsoluteDeviationFunctions.State> stateWindow = new ArrayList<>();
            for (int stateIndex = Math.max(0, states.size() - 5); stateIndex < states.size(); ++stateIndex) {
                stateWindow.add(states.get(stateIndex));
            }
            currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, currentState)", ImmutableMap.of("states", stateWindow, "currentState", currentState));
        }
    }
    {
        Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMin()));
        Assert.assertTrue("Score: " + score + " is not an outlier despite being a minimum.", score > 3.5);
    }
    {
        Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMax()));
        Assert.assertTrue("Score: " + score + " is not an outlier despite being a maximum", score > 3.5);
    }
    {
        Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() + 4 * stats.getStandardDeviation()));
        Assert.assertTrue("Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5);
    }
    {
        Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() - 4 * stats.getStandardDeviation()));
        Assert.assertTrue("Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5);
    }
    {
        Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean()));
        Assert.assertFalse("Score: " + score + " is an outlier despite being the mean", score > 3.5);
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) ArrayList(java.util.ArrayList) TDistribution(org.apache.commons.math3.distribution.TDistribution) MersenneTwister(org.apache.commons.math3.random.MersenneTwister) Test(org.junit.Test)

Aggregations

Test (org.testng.annotations.Test)27 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)23 List (java.util.List)17 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)16 RealMatrix (org.apache.commons.math3.linear.RealMatrix)14 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)12 Utils (org.broadinstitute.hellbender.utils.Utils)12 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)10 Arrays (java.util.Arrays)10 IntStream (java.util.stream.IntStream)10 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)10 WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)10 Logger (org.apache.logging.log4j.Logger)10 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)10 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 Function (java.util.function.Function)9 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)9