use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class GibbsSamplerSingleGaussianUnitTest method testRunMCMCOnSingleGaussianModel.
/**
* Tests Bayesian inference of a Gaussian model via MCMC. Recovery of input values for the variance and mean
* global parameters is checked. In particular, the mean and standard deviation of the posteriors for
* both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
* (after 250 burn-in samples have been discarded).
*/
@Test
public void testRunMCMCOnSingleGaussianModel() {
//Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
//Create a GibbsSampler, passing the total number of samples (including burn-in samples)
//and the model held by the Modeller.
final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
//Run the MCMC.
gibbsSampler.runMCMC();
//Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
//parameter name, type, and burn-in number to the getSamples method.
final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));
//Check that the statistics---i.e., the means and standard deviations---of the posteriors
//agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class SliceSamplerUnitTest method testInitialPointOutOfRange.
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInitialPointOutOfRange() {
rng.setSeed(RANDOM_SEED);
final double mean = 5.;
final double standardDeviation = 0.75;
final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;
final double xInitial = -10.;
final double xMin = 0.;
final double xMax = 1.;
final double width = 0.5;
final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
normalSampler.sample(xInitial);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class SliceSamplerUnitTest method testSliceSamplingOfNormalDistribution.
/**
* Test slice sampling of a normal distribution. Checks that input mean and standard deviation are recovered
* by 10000 samples to a relative error of 0.5% and 2%, respectively.
*/
@Test
public void testSliceSamplingOfNormalDistribution() {
rng.setSeed(RANDOM_SEED);
final double mean = 5.;
final double standardDeviation = 0.75;
final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;
final double xInitial = 1.;
final double xMin = Double.NEGATIVE_INFINITY;
final double xMax = Double.POSITIVE_INFINITY;
final double width = 0.5;
final int numSamples = 10000;
final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));
final double sampleMean = new Mean().evaluate(samples);
final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class GibbsSamplerCopyRatioUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.
/**
* Tests Bayesian inference of a toy copy-ratio model via MCMC.
* <p>
* Recovery of input values for the variance global parameter and the segment-level mean parameters is checked.
* In particular, the mean and standard deviation of the posterior for the variance must be recovered to within
* a relative error of 1% and 5%, respectively, in 500 samples (after 250 burn-in samples have been discarded).
* </p>
* <p>
* Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
* 1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
* a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of 10, 5, and 2).
* Finally, the mean of the standard deviations of the posteriors for the segment-level means should be
* recovered to within a relative error of 5%.
* </p>
* <p>
* With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
* of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
* (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
* </p>
*/
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() {
//Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
final CopyRatioModeller modeller = new CopyRatioModeller(VARIANCE_INITIAL, MEAN_INITIAL, COVERAGES_FILE, NUM_TARGETS_PER_SEGMENT_FILE);
//Create a GibbsSampler, passing the total number of samples (including burn-in samples)
//and the model held by the Modeller.
final GibbsSampler<CopyRatioParameter, CopyRatioState, CopyRatioDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
//Run the MCMC.
gibbsSampler.runMCMC();
//Check that the statistics---i.e., the mean and standard deviation---of the variance posterior
//agree with those found by emcee/analytically to a relative error of 1% and 5%, respectively.
final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(CopyRatioParameter.VARIANCE, Double.class, NUM_BURN_IN));
final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
//Check statistics---i.e., the mean and standard deviation---of the segment-level mean posteriors.
//In particular, check that the number of segments where the true mean falls outside confidence intervals
//is roughly consistent with a normal distribution.
final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
final int numSegments = meansTruth.size();
final List<SegmentMeans> meansSamples = gibbsSampler.getSamples(CopyRatioParameter.SEGMENT_MEANS, SegmentMeans.class, NUM_BURN_IN);
int numMeansOutsideOneSigma = 0;
int numMeansOutsideTwoSigma = 0;
int numMeansOutsideThreeSigma = 0;
final List<Double> meanPosteriorStandardDeviations = new ArrayList<>();
for (int segment = 0; segment < numSegments; segment++) {
final int j = segment;
final double[] meanInSegmentSamples = Doubles.toArray(meansSamples.stream().map(s -> s.get(j)).collect(Collectors.toList()));
final double meanPosteriorCenter = new Mean().evaluate(meanInSegmentSamples);
final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanInSegmentSamples);
meanPosteriorStandardDeviations.add(meanPosteriorStandardDeviation);
final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
numMeansOutsideOneSigma++;
}
if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
numMeansOutsideTwoSigma++;
}
if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
numMeansOutsideThreeSigma++;
}
}
final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(Doubles.toArray(meanPosteriorStandardDeviations));
Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class GibbsSamplerSingleGaussianUnitTest method testRunMCMCOnSingleGaussianModel.
/**
* Tests Bayesian inference of a Gaussian model via MCMC. Recovery of input values for the variance and mean
* global parameters is checked. In particular, the mean and standard deviation of the posteriors for
* both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
* (after 250 burn-in samples have been discarded).
*/
@Test
public void testRunMCMCOnSingleGaussianModel() {
//Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
//Create a GibbsSampler, passing the total number of samples (including burn-in samples)
//and the model held by the Modeller.
final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
//Run the MCMC.
gibbsSampler.runMCMC();
//Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
//parameter name, type, and burn-in number to the getSamples method.
final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));
//Check that the statistics---i.e., the means and standard deviations---of the posteriors
//agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Aggregations