Search in sources :

Example 21 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk-protected by broadinstitute.

the class CoverageModelParameters method generateRandomModel.

/**
     * Generates random coverage model parameters.
     *
     * @param targetList list of targets
     * @param numLatents number of latent variables
     * @param seed random seed
     * @param randomMeanLogBiasStandardDeviation std of mean log bias (mean is set to 0)
     * @param randomBiasCovariatesStandardDeviation std of bias covariates (mean is set to 0)
     * @param randomMaxUnexplainedVariance max value of unexplained variance (samples are taken from a uniform
     *                                     distribution [0, {@code randomMaxUnexplainedVariance}])
     * @param initialBiasCovariatesARDCoefficients initial row vector of ARD coefficients
     * @return an instance of {@link CoverageModelParameters}
     */
public static CoverageModelParameters generateRandomModel(final List<Target> targetList, final int numLatents, final long seed, final double randomMeanLogBiasStandardDeviation, final double randomBiasCovariatesStandardDeviation, final double randomMaxUnexplainedVariance, final INDArray initialBiasCovariatesARDCoefficients) {
    Utils.validateArg(numLatents >= 0, "Dimension of the bias space must be non-negative");
    Utils.validateArg(randomBiasCovariatesStandardDeviation >= 0, "Standard deviation of random bias covariates" + " must be non-negative");
    Utils.validateArg(randomMeanLogBiasStandardDeviation >= 0, "Standard deviation of random mean log bias" + " must be non-negative");
    Utils.validateArg(randomMaxUnexplainedVariance >= 0, "Max random unexplained variance must be non-negative");
    Utils.validateArg(initialBiasCovariatesARDCoefficients == null || numLatents > 0 && initialBiasCovariatesARDCoefficients.length() == numLatents, "If ARD is enabled, the dimension" + " of the bias latent space must be positive and match the length of ARD coeffecient vector");
    final boolean biasCovariatesEnabled = numLatents > 0;
    final int numTargets = targetList.size();
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(seed));
    /* Gaussian random for mean log bias */
    final INDArray initialMeanLogBias = Nd4j.create(getNormalRandomNumbers(numTargets, 0, randomMeanLogBiasStandardDeviation, rng), new int[] { 1, numTargets });
    /* Uniform random for unexplained variance */
    final INDArray initialUnexplainedVariance = Nd4j.create(getUniformRandomNumbers(numTargets, 0, randomMaxUnexplainedVariance, rng), new int[] { 1, numTargets });
    final INDArray initialMeanBiasCovariates;
    if (biasCovariatesEnabled) {
        /* Gaussian random for bias covariates */
        initialMeanBiasCovariates = Nd4j.create(getNormalRandomNumbers(numTargets * numLatents, 0, randomBiasCovariatesStandardDeviation, rng), new int[] { numTargets, numLatents });
    } else {
        initialMeanBiasCovariates = null;
    }
    return new CoverageModelParameters(targetList, initialMeanLogBias, initialUnexplainedVariance, initialMeanBiasCovariates, initialBiasCovariatesARDCoefficients);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) RandomGenerator(org.apache.commons.math3.random.RandomGenerator)

Example 22 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk by broadinstitute.

the class SliceSamplerUnitTest method testSliceSamplingOfPeakedBetaDistribution.

/**
     * Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
     * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
     * respectively.
     */
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
    rng.setSeed(RANDOM_SEED);
    final double alpha = 10.;
    final double beta = 4.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Test(org.testng.annotations.Test)

Example 23 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk by broadinstitute.

the class AdaptiveMetropolisSamplerUnitTest method testBeta.

@Test
public void testBeta() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double a : Arrays.asList(10, 20, 30)) {
        for (final double b : Arrays.asList(10, 20, 30)) {
            final double theoreticalMean = a / (a + b);
            final double theoreticalVariance = a * b / ((a + b) * (a + b) * (a + b + 1));
            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(theoreticalVariance / NUM_SAMPLES);
            final Function<Double, Double> logPDF = x -> (a - 1) * Math.log(x) + (b - 1) * Math.log(1 - x);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_BETA_GUESS, INITIAL_STEP_SIZE, 0, 1);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
            Assert.assertEquals(sampleMean, theoreticalMean, 10 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, theoreticalVariance, 10e-4);
        }
    }
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Test(org.testng.annotations.Test) Random(java.util.Random) Function(java.util.function.Function) Random(java.util.Random) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Test(org.testng.annotations.Test)

Example 24 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk by broadinstitute.

the class AdaptiveMetropolisSamplerUnitTest method testGaussian.

@Test
public void testGaussian() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double theoreticalMean : Arrays.asList(0)) {
        for (final double precision : Arrays.asList(1.0)) {
            final double variance = 1 / precision;
            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(variance / NUM_SAMPLES);
            final Function<Double, Double> logPDF = x -> -(precision / 2) * (x - theoreticalMean) * (x - theoreticalMean);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_GAUSSIAN_GUESS, INITIAL_STEP_SIZE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
            Assert.assertEquals(sampleMean, theoreticalMean, 6 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, variance, variance / 10);
        }
    }
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Test(org.testng.annotations.Test) Random(java.util.Random) Function(java.util.function.Function) Random(java.util.Random) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Test(org.testng.annotations.Test)

Example 25 with Variance

use of org.apache.commons.math3.stat.descriptive.moment.Variance in project gatk-protected by broadinstitute.

the class CNLOHCaller method calculateVarianceOfCopyNeutralSegmentMeans.

/**
     * Attempt to get an idea of segment mean variance near copy neutral.
     *
     * @param segments Never {@code null}
     * @return variance of segment mean (in CR space) of segments that are "close enough" to copy neutral.
     *   Zero if no segments are "close enough"
     */
private double calculateVarianceOfCopyNeutralSegmentMeans(final List<ACNVModeledSegment> segments, final double meanBiasInCR) {
    Utils.nonNull(segments);
    // Only consider values "close enough" to copy neutral (CR == 1).
    final double neutralCR = 1 + meanBiasInCR;
    final double[] neutralSegmentMeans = segments.stream().mapToDouble(ACNVModeledSegment::getSegmentMeanInCRSpace).filter(m -> Math.abs(m - neutralCR) < CLOSE_ENOUGH_TO_COPY_NEUTRAL_IN_CR).toArray();
    return new Variance().evaluate(neutralSegmentMeans);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) RealVector(org.apache.commons.math3.linear.RealVector) Function(java.util.function.Function) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) GammaDistribution(org.apache.commons.math3.distribution.GammaDistribution) Gamma(org.apache.commons.math3.special.Gamma) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) BaseAbstractUnivariateIntegrator(org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator) GoalType(org.apache.commons.math3.optim.nonlinear.scalar.GoalType) MatrixUtils(org.apache.commons.math3.linear.MatrixUtils) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) SimpsonIntegrator(org.apache.commons.math3.analysis.integration.SimpsonIntegrator) JavaRDD(org.apache.spark.api.java.JavaRDD) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) Pair(org.apache.commons.math3.util.Pair) Collectors(java.util.stream.Collectors) BrentOptimizer(org.apache.commons.math3.optim.univariate.BrentOptimizer) Serializable(java.io.Serializable) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HomoSapiensConstants(org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants) MaxEval(org.apache.commons.math3.optim.MaxEval) LogManager(org.apache.logging.log4j.LogManager) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance)

Aggregations

Collectors (java.util.stream.Collectors)24 IntStream (java.util.stream.IntStream)24 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)22 Nonnull (javax.annotation.Nonnull)20 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)18 Variance (org.apache.commons.math3.stat.descriptive.moment.Variance)18 List (java.util.List)16 FastMath (org.apache.commons.math3.util.FastMath)16 Utils (org.broadinstitute.hellbender.utils.Utils)16 INDArray (org.nd4j.linalg.api.ndarray.INDArray)16 Function (java.util.function.Function)15 Arrays (java.util.Arrays)14 Nullable (javax.annotation.Nullable)14 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)14 RealMatrix (org.apache.commons.math3.linear.RealMatrix)14 Logger (org.apache.logging.log4j.Logger)14 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)14 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 Nd4j (org.nd4j.linalg.factory.Nd4j)14 NDArrayIndex (org.nd4j.linalg.indexing.NDArrayIndex)14