use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class PosteriorSummaryUtils method calculatePosteriorMode.
/**
* Given a list of posterior samples, returns an estimate of the posterior mode (using
* mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
* Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
* or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
* {@link Double#NaN}, {@link Double#NaN} will be returned.
* @param samples posterior samples, cannot be {@code null} and number of samples must be greater than 0
* @param ctx {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
*/
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
Utils.nonNull(samples);
Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");
//calculate sample min, max, mean, and standard deviation
final double sampleMin = Collections.min(samples);
final double sampleMax = Collections.max(samples);
final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));
//if samples are all the same or contain NaN, can simply return mean
if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
return sampleMean;
}
//use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
//see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);
//use kernel density estimation to approximate posterior from samples
final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);
//use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] { f })[0]);
//search for mode within sample range, start near sample mean
final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class ReCapSegCaller method calculateT.
private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {
//Get the segments that are likely copy neutral.
// Math.abs removed to mimic python...
final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());
// Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
// can be far away from copy neutral
final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage = new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream().flatMap(s -> targetsWithCoverage.targets(s).stream()).mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();
final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);
// Now we filter outliers by only including those w/in 2 standard deviations.
final double[] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio).filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();
return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class SliceSamplerUnitTest method testInitialPointOutOfRange.
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInitialPointOutOfRange() {
rng.setSeed(RANDOM_SEED);
final double mean = 5.;
final double standardDeviation = 0.75;
final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;
final double xInitial = -10.;
final double xMin = 0.;
final double xMax = 1.;
final double width = 0.5;
final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
normalSampler.sample(xInitial);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class SliceSamplerUnitTest method testSliceSamplingOfNormalDistribution.
/**
* Test slice sampling of a normal distribution. Checks that input mean and standard deviation are recovered
* by 10000 samples to a relative error of 0.5% and 2%, respectively.
*/
@Test
public void testSliceSamplingOfNormalDistribution() {
rng.setSeed(RANDOM_SEED);
final double mean = 5.;
final double standardDeviation = 0.75;
final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;
final double xInitial = 1.;
final double xMin = Double.NEGATIVE_INFINITY;
final double xMax = Double.POSITIVE_INFINITY;
final double width = 0.5;
final int numSamples = 10000;
final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));
final double sampleMean = new Mean().evaluate(samples);
final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class RandomDNAUnitTest method checkResults.
public void checkResults(final int[] results, final int n, final int m) {
final double[] dresults = MathUtils.promote(results);
final double mean = MathUtils.mean(dresults, 0, dresults.length);
final double std = new StandardDeviation().evaluate(dresults);
final double expectedMean = (n * m) / 4.0;
// not really because it's the population not the sample dtd but it'll do
final double s = std;
Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
Assert.assertTrue(mean > expectedMean - 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
}
Aggregations