use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project presto by prestodb.
the class TestKHyperLogLog method testCardinality.
@Test
public void testCardinality() throws Exception {
int trials = 1000;
for (int indexBits = 4; indexBits <= 12; indexBits++) {
Map<Integer, StandardDeviation> errors = new HashMap<>();
int numberOfBuckets = 1 << indexBits;
int maxCardinality = numberOfBuckets * 2;
for (int trial = 0; trial < trials; trial++) {
KHyperLogLog khll = new KHyperLogLog();
for (int cardinality = 1; cardinality <= maxCardinality; cardinality++) {
khll.add(ThreadLocalRandom.current().nextLong(), 0L);
if (cardinality % (numberOfBuckets / 10) == 0) {
// only do this a few times, since computing the cardinality is currently not
// as cheap as it should be
double error = (khll.cardinality() - cardinality) * 1.0 / cardinality;
StandardDeviation stdev = errors.computeIfAbsent(cardinality, k -> new StandardDeviation());
stdev.increment(error);
}
}
}
double expectedStandardError = 1.04 / Math.sqrt(1 << indexBits);
for (Map.Entry<Integer, StandardDeviation> entry : errors.entrySet()) {
// Give an extra error margin. This is mostly a sanity check to catch egregious errors
double realStandardError = entry.getValue().getResult();
assertTrue(realStandardError <= expectedStandardError * 1.1, String.format("Failed at p = %s, cardinality = %s. Expected std error = %s, actual = %s", indexBits, entry.getKey(), expectedStandardError, realStandardError));
}
}
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project presto by prestodb.
the class TestLongStdDevAggregation method getExpectedValue.
@Override
public Number getExpectedValue(int start, int length) {
if (length < 2) {
return null;
}
double[] values = new double[length];
for (int i = 0; i < length; i++) {
values[i] = start + i;
}
StandardDeviation stdDev = new StandardDeviation();
return stdDev.evaluate(values);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project ontrack by nemerosa.
the class JobScatteringTest method scatteringInSameType.
@Test
public void scatteringInSameType() {
// Scheduler
DefaultJobScheduler scheduler = new DefaultJobScheduler(NOPJobDecorator.INSTANCE, new SynchronousScheduledExecutorService(), NOPJobListener.INSTANCE, false, true, 1.0);
// Creates a list of jobs with a weak key
List<TestJob> jobs = TestUtils.range(1, 100).stream().map(i -> TestJob.of(String.format("%d", i))).collect(Collectors.toList());
// Orchestration of all those jobs every 6 hours
Collection<JobOrchestratorSupplier> jobOrchestratorSupplier = Collections.singletonList(() -> jobs.stream().map(j -> JobRegistration.of(j).everyMinutes(6 * 60)));
// Orchestrator
JobOrchestrator orchestrator = new JobOrchestrator(scheduler, "Orchestrator", jobOrchestratorSupplier);
// Scheduling the orchestrator (manual mode)
scheduler.schedule(orchestrator, Schedule.NONE);
// Launching the orchestrator (manually)
orchestrator.orchestrate(JobRunListener.out());
// Getting the actual schedules of the jobs
List<Schedule> actualSchedules = jobs.stream().map(job -> scheduler.getJobStatus(job.getKey())).filter(Optional::isPresent).map(Optional::get).map(JobStatus::getActualSchedule).collect(Collectors.toList());
List<Long> initialPeriods = actualSchedules.stream().map(Schedule::getInitialPeriod).collect(Collectors.toList());
initialPeriods.forEach(l -> System.out.format("--> %d%n", l));
// Checks that all jobs have been scheduled
assertEquals("All jobs have been scheduled", jobs.size(), initialPeriods.size());
// Checks that all schedules more or less different
DescriptiveStatistics stats = new DescriptiveStatistics();
initialPeriods.forEach(stats::addValue);
// Gets the std deviation
double standardDeviation = stats.getStandardDeviation();
double max = stats.getMax();
// Gets this in minutes (this was returned in ms)
double stdDevMinutes = TimeUnit.MINUTES.convert((long) standardDeviation, TimeUnit.MILLISECONDS);
double maxMinutes = TimeUnit.MINUTES.convert((long) max, TimeUnit.MILLISECONDS);
// It must be >> 0
assertTrue("Std deviation must be >> 0", stdDevMinutes > 60.0);
System.out.println("Max = " + maxMinutes);
assertTrue("Max is <= period", maxMinutes <= 6 * 60.0);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class MatrixSummaryUtils method getRowVariances.
/**
* Return an array containing the variance for each row in the given matrix.
* @param m Not {@code null}. Size MxN. If any entry is NaN, the corresponding rows will have a
* variance of NaN.
* @return array of size M. Never {@code null} IF there is only one column (or only one entry
*/
public static double[] getRowVariances(final RealMatrix m) {
Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
final StandardDeviation std = new StandardDeviation();
return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class GibbsSamplerCopyRatioUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.
/**
* Tests Bayesian inference of a toy copy-ratio model via MCMC.
* <p>
* Recovery of input values for the variance global parameter and the segment-level mean parameters is checked.
* In particular, the mean and standard deviation of the posterior for the variance must be recovered to within
* a relative error of 1% and 5%, respectively, in 500 samples (after 250 burn-in samples have been discarded).
* </p>
* <p>
* Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
* 1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
* a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of 10, 5, and 2).
* Finally, the mean of the standard deviations of the posteriors for the segment-level means should be
* recovered to within a relative error of 5%.
* </p>
* <p>
* With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
* of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
* (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
* </p>
*/
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() {
//Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
final CopyRatioModeller modeller = new CopyRatioModeller(VARIANCE_INITIAL, MEAN_INITIAL, COVERAGES_FILE, NUM_TARGETS_PER_SEGMENT_FILE);
//Create a GibbsSampler, passing the total number of samples (including burn-in samples)
//and the model held by the Modeller.
final GibbsSampler<CopyRatioParameter, CopyRatioState, CopyRatioDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
//Run the MCMC.
gibbsSampler.runMCMC();
//Check that the statistics---i.e., the mean and standard deviation---of the variance posterior
//agree with those found by emcee/analytically to a relative error of 1% and 5%, respectively.
final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(CopyRatioParameter.VARIANCE, Double.class, NUM_BURN_IN));
final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
//Check statistics---i.e., the mean and standard deviation---of the segment-level mean posteriors.
//In particular, check that the number of segments where the true mean falls outside confidence intervals
//is roughly consistent with a normal distribution.
final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
final int numSegments = meansTruth.size();
final List<SegmentMeans> meansSamples = gibbsSampler.getSamples(CopyRatioParameter.SEGMENT_MEANS, SegmentMeans.class, NUM_BURN_IN);
int numMeansOutsideOneSigma = 0;
int numMeansOutsideTwoSigma = 0;
int numMeansOutsideThreeSigma = 0;
final List<Double> meanPosteriorStandardDeviations = new ArrayList<>();
for (int segment = 0; segment < numSegments; segment++) {
final int j = segment;
final double[] meanInSegmentSamples = Doubles.toArray(meansSamples.stream().map(s -> s.get(j)).collect(Collectors.toList()));
final double meanPosteriorCenter = new Mean().evaluate(meanInSegmentSamples);
final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanInSegmentSamples);
meanPosteriorStandardDeviations.add(meanPosteriorStandardDeviation);
final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
numMeansOutsideOneSigma++;
}
if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
numMeansOutsideTwoSigma++;
}
if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
numMeansOutsideThreeSigma++;
}
}
final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(Doubles.toArray(meanPosteriorStandardDeviations));
Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Aggregations