Search in sources :

Example 66 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class SliceSamplerUnitTest method testSliceSamplingOfNormalDistribution.

/**
     * Test slice sampling of a normal distribution.  Checks that input mean and standard deviation are recovered
     * by 10000 samples to a relative error of 0.5% and 2%, respectively.
     */
@Test
public void testSliceSamplingOfNormalDistribution() {
    rng.setSeed(RANDOM_SEED);
    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
    final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;
    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numSamples = 10000;
    final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) Test(org.testng.annotations.Test)

Example 67 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class HDF5LibraryUnitTest method testCreateLargeMatrix.

@Test
public void testCreateLargeMatrix() {
    // Creates a large PoN of junk values and simply tests that these can be written and read.
    // Make a big, fake set of read counts.
    final int numRows = 2500000;
    final int numCols = 10;
    final double mean = 3e-7;
    final double sigma = 1e-9;
    final RealMatrix bigCounts = createMatrixOfGaussianValues(numRows, numCols, mean, sigma);
    final File tempOutputHD5 = IOUtils.createTempFile("big-ol-", ".hd5");
    final HDF5File hdf5File = new HDF5File(tempOutputHD5, HDF5File.OpenMode.CREATE);
    final String hdf5Path = "/test/m";
    hdf5File.makeDoubleMatrix(hdf5Path, bigCounts.getData());
    hdf5File.close();
    final HDF5File hdf5FileForReading = new HDF5File(tempOutputHD5, HDF5File.OpenMode.READ_ONLY);
    final double[][] result = hdf5FileForReading.readDoubleMatrix(hdf5Path);
    final RealMatrix resultAsRealMatrix = new Array2DRowRealMatrix(result);
    Assert.assertTrue(resultAsRealMatrix.getRowDimension() == numRows);
    Assert.assertTrue(resultAsRealMatrix.getColumnDimension() == numCols);
    final RealMatrix readMatrix = new Array2DRowRealMatrix(result);
    PoNTestUtils.assertEqualsMatrix(readMatrix, bigCounts, false);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 68 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class AdaptiveMetropolisSamplerUnitTest method testGaussian.

@Test
public void testGaussian() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double theoreticalMean : Arrays.asList(0)) {
        for (final double precision : Arrays.asList(1.0)) {
            final double variance = 1 / precision;
            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(variance / NUM_SAMPLES);
            final Function<Double, Double> logPDF = x -> -(precision / 2) * (x - theoreticalMean) * (x - theoreticalMean);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_GAUSSIAN_GUESS, INITIAL_STEP_SIZE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
            Assert.assertEquals(sampleMean, theoreticalMean, 6 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, variance, variance / 10);
        }
    }
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Test(org.testng.annotations.Test) Random(java.util.Random) Function(java.util.function.Function) Random(java.util.Random) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Test(org.testng.annotations.Test)

Example 69 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class HDF5LibraryUnitTest method createMatrixOfGaussianValues.

private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
    final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return bigCounts;
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 70 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class CNLOHCaller method calculateVarianceOfCopyNeutralSegmentMeans.

/**
     * Attempt to get an idea of segment mean variance near copy neutral.
     *
     * @param segments Never {@code null}
     * @return variance of segment mean (in CR space) of segments that are "close enough" to copy neutral.
     *   Zero if no segments are "close enough"
     */
private double calculateVarianceOfCopyNeutralSegmentMeans(final List<ACNVModeledSegment> segments, final double meanBiasInCR) {
    Utils.nonNull(segments);
    // Only consider values "close enough" to copy neutral (CR == 1).
    final double neutralCR = 1 + meanBiasInCR;
    final double[] neutralSegmentMeans = segments.stream().mapToDouble(ACNVModeledSegment::getSegmentMeanInCRSpace).filter(m -> Math.abs(m - neutralCR) < CLOSE_ENOUGH_TO_COPY_NEUTRAL_IN_CR).toArray();
    return new Variance().evaluate(neutralSegmentMeans);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) RealVector(org.apache.commons.math3.linear.RealVector) Function(java.util.function.Function) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) GammaDistribution(org.apache.commons.math3.distribution.GammaDistribution) Gamma(org.apache.commons.math3.special.Gamma) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) BaseAbstractUnivariateIntegrator(org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator) GoalType(org.apache.commons.math3.optim.nonlinear.scalar.GoalType) MatrixUtils(org.apache.commons.math3.linear.MatrixUtils) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) SimpsonIntegrator(org.apache.commons.math3.analysis.integration.SimpsonIntegrator) JavaRDD(org.apache.spark.api.java.JavaRDD) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) Pair(org.apache.commons.math3.util.Pair) Collectors(java.util.stream.Collectors) BrentOptimizer(org.apache.commons.math3.optim.univariate.BrentOptimizer) Serializable(java.io.Serializable) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HomoSapiensConstants(org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants) MaxEval(org.apache.commons.math3.optim.MaxEval) LogManager(org.apache.logging.log4j.LogManager) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance)

Aggregations

Test (org.testng.annotations.Test)27 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)23 List (java.util.List)17 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)16 RealMatrix (org.apache.commons.math3.linear.RealMatrix)14 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)12 Utils (org.broadinstitute.hellbender.utils.Utils)12 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)10 Arrays (java.util.Arrays)10 IntStream (java.util.stream.IntStream)10 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)10 WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)10 Logger (org.apache.logging.log4j.Logger)10 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)10 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 Function (java.util.function.Function)9 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)9