Search in sources :

Example 41 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk by broadinstitute.

the class GATKProtectedMathUtils method columnStdDevs.

/**
     * Calculates the standard deviation per column from a matrix.
     * @param matrix the input matrix.
     * @return never {@code null}, an array with as many positions as columns in {@code matrix}.
     * @throws IllegalArgumentException if {@code matrix} is {@code null}.
     */
public static double[] columnStdDevs(final RealMatrix matrix) {
    Utils.nonNull(matrix);
    final Variance varianceEvaluator = new Variance();
    return IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> Math.sqrt(varianceEvaluator.evaluate(matrix.getColumn(c)))).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Pair(org.apache.commons.math3.util.Pair) MathArrays(org.apache.commons.math3.util.MathArrays) Collection(java.util.Collection) FastMath(org.apache.commons.math3.util.FastMath) EnumeratedDistribution(org.apache.commons.math3.distribution.EnumeratedDistribution) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) FourierLinearOperator(org.broadinstitute.hellbender.tools.coveragemodel.linalg.FourierLinearOperator) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Serializable(java.io.Serializable) List(java.util.List) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Nonnull(javax.annotation.Nonnull) Collections(java.util.Collections) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance)

Example 42 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk by broadinstitute.

the class GATKProtectedMathUtils method rowVariances.

public static double[] rowVariances(final RealMatrix matrix) {
    Utils.nonNull(matrix);
    final Variance varianceEvaluator = new Variance();
    return IntStream.range(0, matrix.getRowDimension()).mapToDouble(r -> varianceEvaluator.evaluate(matrix.getRow(r))).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Pair(org.apache.commons.math3.util.Pair) MathArrays(org.apache.commons.math3.util.MathArrays) Collection(java.util.Collection) FastMath(org.apache.commons.math3.util.FastMath) EnumeratedDistribution(org.apache.commons.math3.distribution.EnumeratedDistribution) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) FourierLinearOperator(org.broadinstitute.hellbender.tools.coveragemodel.linalg.FourierLinearOperator) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Serializable(java.io.Serializable) List(java.util.List) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Nonnull(javax.annotation.Nonnull) Collections(java.util.Collections) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance)

Example 43 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk-protected by broadinstitute.

the class AdaptiveMetropolisSamplerUnitTest method testBeta.

@Test
public void testBeta() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double a : Arrays.asList(10, 20, 30)) {
        for (final double b : Arrays.asList(10, 20, 30)) {
            final double theoreticalMean = a / (a + b);
            final double theoreticalVariance = a * b / ((a + b) * (a + b) * (a + b + 1));
            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(theoreticalVariance / NUM_SAMPLES);
            final Function<Double, Double> logPDF = x -> (a - 1) * Math.log(x) + (b - 1) * Math.log(1 - x);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_BETA_GUESS, INITIAL_STEP_SIZE, 0, 1);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
            Assert.assertEquals(sampleMean, theoreticalMean, 10 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, theoreticalVariance, 10e-4);
        }
    }
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Test(org.testng.annotations.Test) Random(java.util.Random) Function(java.util.function.Function) Random(java.util.Random) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Test(org.testng.annotations.Test)

Example 44 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk-protected by broadinstitute.

the class GibbsSamplerCopyRatioUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.

/**
     * Tests Bayesian inference of a toy copy-ratio model via MCMC.
     * <p>
     *     Recovery of input values for the variance global parameter and the segment-level mean parameters is checked.
     *     In particular, the mean and standard deviation of the posterior for the variance must be recovered to within
     *     a relative error of 1% and 5%, respectively, in 500 samples (after 250 burn-in samples have been discarded).
     * </p>
     * <p>
     *     Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
     *     1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
     *     a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of 10, 5, and 2).
     *     Finally, the mean of the standard deviations of the posteriors for the segment-level means should be
     *     recovered to within a relative error of 5%.
     * </p>
     * <p>
     *     With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
     *     of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
     *     (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
     * </p>
     */
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final CopyRatioModeller modeller = new CopyRatioModeller(VARIANCE_INITIAL, MEAN_INITIAL, COVERAGES_FILE, NUM_TARGETS_PER_SEGMENT_FILE);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<CopyRatioParameter, CopyRatioState, CopyRatioDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();
    //Check that the statistics---i.e., the mean and standard deviation---of the variance posterior
    //agree with those found by emcee/analytically to a relative error of 1% and 5%, respectively.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(CopyRatioParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    //Check statistics---i.e., the mean and standard deviation---of the segment-level mean posteriors.
    //In particular, check that the number of segments where the true mean falls outside confidence intervals
    //is roughly consistent with a normal distribution.
    final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
    final int numSegments = meansTruth.size();
    final List<SegmentMeans> meansSamples = gibbsSampler.getSamples(CopyRatioParameter.SEGMENT_MEANS, SegmentMeans.class, NUM_BURN_IN);
    int numMeansOutsideOneSigma = 0;
    int numMeansOutsideTwoSigma = 0;
    int numMeansOutsideThreeSigma = 0;
    final List<Double> meanPosteriorStandardDeviations = new ArrayList<>();
    for (int segment = 0; segment < numSegments; segment++) {
        final int j = segment;
        final double[] meanInSegmentSamples = Doubles.toArray(meansSamples.stream().map(s -> s.get(j)).collect(Collectors.toList()));
        final double meanPosteriorCenter = new Mean().evaluate(meanInSegmentSamples);
        final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanInSegmentSamples);
        meanPosteriorStandardDeviations.add(meanPosteriorStandardDeviation);
        final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
        if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
            numMeansOutsideOneSigma++;
        }
        if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
            numMeansOutsideTwoSigma++;
        }
        if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
            numMeansOutsideThreeSigma++;
        }
    }
    final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(Doubles.toArray(meanPosteriorStandardDeviations));
    Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
    Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
    Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
    Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ArrayList(java.util.ArrayList) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 45 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk-protected by broadinstitute.

the class GibbsSamplerSingleGaussianUnitTest method testRunMCMCOnSingleGaussianModel.

/**
     * Tests Bayesian inference of a Gaussian model via MCMC.  Recovery of input values for the variance and mean
     * global parameters is checked.  In particular, the mean and standard deviation of the posteriors for
     * both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
     * (after 250 burn-in samples have been discarded).
     */
@Test
public void testRunMCMCOnSingleGaussianModel() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();
    //Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
    //parameter name, type, and burn-in number to the getSamples method.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));
    //Check that the statistics---i.e., the means and standard deviations---of the posteriors
    //agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
    final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
    Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Collectors (java.util.stream.Collectors)24 IntStream (java.util.stream.IntStream)24 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)22 Nonnull (javax.annotation.Nonnull)20 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)18 Variance (org.apache.commons.math3.stat.descriptive.moment.Variance)17 List (java.util.List)16 FastMath (org.apache.commons.math3.util.FastMath)16 Utils (org.broadinstitute.hellbender.utils.Utils)16 INDArray (org.nd4j.linalg.api.ndarray.INDArray)16 Arrays (java.util.Arrays)14 Function (java.util.function.Function)14 Nullable (javax.annotation.Nullable)14 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)14 Logger (org.apache.logging.log4j.Logger)14 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)14 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 Nd4j (org.nd4j.linalg.factory.Nd4j)14 NDArrayIndex (org.nd4j.linalg.indexing.NDArrayIndex)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)12