Search in sources :

Example 6 with StandardDeviation

use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project presto by prestodb.

the class TestKHyperLogLog method testCardinality.

@Test
public void testCardinality() throws Exception {
    int trials = 1000;
    for (int indexBits = 4; indexBits <= 12; indexBits++) {
        Map<Integer, StandardDeviation> errors = new HashMap<>();
        int numberOfBuckets = 1 << indexBits;
        int maxCardinality = numberOfBuckets * 2;
        for (int trial = 0; trial < trials; trial++) {
            KHyperLogLog khll = new KHyperLogLog();
            for (int cardinality = 1; cardinality <= maxCardinality; cardinality++) {
                khll.add(ThreadLocalRandom.current().nextLong(), 0L);
                if (cardinality % (numberOfBuckets / 10) == 0) {
                    // only do this a few times, since computing the cardinality is currently not
                    // as cheap as it should be
                    double error = (khll.cardinality() - cardinality) * 1.0 / cardinality;
                    StandardDeviation stdev = errors.computeIfAbsent(cardinality, k -> new StandardDeviation());
                    stdev.increment(error);
                }
            }
        }
        double expectedStandardError = 1.04 / Math.sqrt(1 << indexBits);
        for (Map.Entry<Integer, StandardDeviation> entry : errors.entrySet()) {
            // Give an extra error margin. This is mostly a sanity check to catch egregious errors
            double realStandardError = entry.getValue().getResult();
            assertTrue(realStandardError <= expectedStandardError * 1.1, String.format("Failed at p = %s, cardinality = %s. Expected std error = %s, actual = %s", indexBits, entry.getKey(), expectedStandardError, realStandardError));
        }
    }
}
Also used : HashMap(java.util.HashMap) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.testng.annotations.Test)

Example 7 with StandardDeviation

use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project presto by prestodb.

the class TestLongStdDevAggregation method getExpectedValue.

@Override
public Number getExpectedValue(int start, int length) {
    if (length < 2) {
        return null;
    }
    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }
    StandardDeviation stdDev = new StandardDeviation();
    return stdDev.evaluate(values);
}
Also used : StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)

Example 8 with StandardDeviation

use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project ontrack by nemerosa.

the class JobScatteringTest method scatteringInSameType.

@Test
public void scatteringInSameType() {
    // Scheduler
    DefaultJobScheduler scheduler = new DefaultJobScheduler(NOPJobDecorator.INSTANCE, new SynchronousScheduledExecutorService(), NOPJobListener.INSTANCE, false, true, 1.0);
    // Creates a list of jobs with a weak key
    List<TestJob> jobs = TestUtils.range(1, 100).stream().map(i -> TestJob.of(String.format("%d", i))).collect(Collectors.toList());
    // Orchestration of all those jobs every 6 hours
    Collection<JobOrchestratorSupplier> jobOrchestratorSupplier = Collections.singletonList(() -> jobs.stream().map(j -> JobRegistration.of(j).everyMinutes(6 * 60)));
    // Orchestrator
    JobOrchestrator orchestrator = new JobOrchestrator(scheduler, "Orchestrator", jobOrchestratorSupplier);
    // Scheduling the orchestrator (manual mode)
    scheduler.schedule(orchestrator, Schedule.NONE);
    // Launching the orchestrator (manually)
    orchestrator.orchestrate(JobRunListener.out());
    // Getting the actual schedules of the jobs
    List<Schedule> actualSchedules = jobs.stream().map(job -> scheduler.getJobStatus(job.getKey())).filter(Optional::isPresent).map(Optional::get).map(JobStatus::getActualSchedule).collect(Collectors.toList());
    List<Long> initialPeriods = actualSchedules.stream().map(Schedule::getInitialPeriod).collect(Collectors.toList());
    initialPeriods.forEach(l -> System.out.format("--> %d%n", l));
    // Checks that all jobs have been scheduled
    assertEquals("All jobs have been scheduled", jobs.size(), initialPeriods.size());
    // Checks that all schedules more or less different
    DescriptiveStatistics stats = new DescriptiveStatistics();
    initialPeriods.forEach(stats::addValue);
    // Gets the std deviation
    double standardDeviation = stats.getStandardDeviation();
    double max = stats.getMax();
    // Gets this in minutes (this was returned in ms)
    double stdDevMinutes = TimeUnit.MINUTES.convert((long) standardDeviation, TimeUnit.MILLISECONDS);
    double maxMinutes = TimeUnit.MINUTES.convert((long) max, TimeUnit.MILLISECONDS);
    // It must be >> 0
    assertTrue("Std deviation must be >> 0", stdDevMinutes > 60.0);
    System.out.println("Max = " + maxMinutes);
    assertTrue("Max is <= period", maxMinutes <= 6 * 60.0);
}
Also used : JobOrchestrator(net.nemerosa.ontrack.job.orchestrator.JobOrchestrator) MessageDigest(java.security.MessageDigest) Collection(java.util.Collection) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TestUtils(net.nemerosa.ontrack.test.TestUtils) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) JobOrchestratorSupplier(net.nemerosa.ontrack.job.orchestrator.JobOrchestratorSupplier) Optional(java.util.Optional) Assert(org.junit.Assert) Collections(java.util.Collections) net.nemerosa.ontrack.job(net.nemerosa.ontrack.job) JobOrchestrator(net.nemerosa.ontrack.job.orchestrator.JobOrchestrator) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Optional(java.util.Optional) JobOrchestratorSupplier(net.nemerosa.ontrack.job.orchestrator.JobOrchestratorSupplier) Test(org.junit.Test)

Example 9 with StandardDeviation

use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.

the class MatrixSummaryUtils method getRowVariances.

/**
     * Return an array containing the variance for each row in the given matrix.
     * @param m Not {@code null}.  Size MxN.    If any entry is NaN, the corresponding rows will have a
     *          variance of NaN.
     * @return array of size M.  Never {@code null}  IF there is only one column (or only one entry
     */
public static double[] getRowVariances(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final StandardDeviation std = new StandardDeviation();
    return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Median(org.apache.commons.math3.stat.descriptive.rank.Median) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) RealMatrix(org.apache.commons.math3.linear.RealMatrix) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)

Example 10 with StandardDeviation

use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.

the class GibbsSamplerCopyRatioUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.

/**
     * Tests Bayesian inference of a toy copy-ratio model via MCMC.
     * <p>
     *     Recovery of input values for the variance global parameter and the segment-level mean parameters is checked.
     *     In particular, the mean and standard deviation of the posterior for the variance must be recovered to within
     *     a relative error of 1% and 5%, respectively, in 500 samples (after 250 burn-in samples have been discarded).
     * </p>
     * <p>
     *     Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
     *     1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
     *     a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of 10, 5, and 2).
     *     Finally, the mean of the standard deviations of the posteriors for the segment-level means should be
     *     recovered to within a relative error of 5%.
     * </p>
     * <p>
     *     With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
     *     of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
     *     (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
     * </p>
     */
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final CopyRatioModeller modeller = new CopyRatioModeller(VARIANCE_INITIAL, MEAN_INITIAL, COVERAGES_FILE, NUM_TARGETS_PER_SEGMENT_FILE);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<CopyRatioParameter, CopyRatioState, CopyRatioDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();
    //Check that the statistics---i.e., the mean and standard deviation---of the variance posterior
    //agree with those found by emcee/analytically to a relative error of 1% and 5%, respectively.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(CopyRatioParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    //Check statistics---i.e., the mean and standard deviation---of the segment-level mean posteriors.
    //In particular, check that the number of segments where the true mean falls outside confidence intervals
    //is roughly consistent with a normal distribution.
    final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
    final int numSegments = meansTruth.size();
    final List<SegmentMeans> meansSamples = gibbsSampler.getSamples(CopyRatioParameter.SEGMENT_MEANS, SegmentMeans.class, NUM_BURN_IN);
    int numMeansOutsideOneSigma = 0;
    int numMeansOutsideTwoSigma = 0;
    int numMeansOutsideThreeSigma = 0;
    final List<Double> meanPosteriorStandardDeviations = new ArrayList<>();
    for (int segment = 0; segment < numSegments; segment++) {
        final int j = segment;
        final double[] meanInSegmentSamples = Doubles.toArray(meansSamples.stream().map(s -> s.get(j)).collect(Collectors.toList()));
        final double meanPosteriorCenter = new Mean().evaluate(meanInSegmentSamples);
        final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanInSegmentSamples);
        meanPosteriorStandardDeviations.add(meanPosteriorStandardDeviation);
        final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
        if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
            numMeansOutsideOneSigma++;
        }
        if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
            numMeansOutsideTwoSigma++;
        }
        if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
            numMeansOutsideThreeSigma++;
        }
    }
    final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(Doubles.toArray(meanPosteriorStandardDeviations));
    Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
    Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
    Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
    Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ArrayList(java.util.ArrayList) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)20 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)11 Test (org.testng.annotations.Test)9 Collectors (java.util.stream.Collectors)4 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)4 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 List (java.util.List)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 IntStream (java.util.stream.IntStream)2 RealMatrix (org.apache.commons.math3.linear.RealMatrix)2 BrentOptimizer (org.apache.commons.math3.optim.univariate.BrentOptimizer)2 SearchInterval (org.apache.commons.math3.optim.univariate.SearchInterval)2 UnivariateObjectiveFunction (org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction)2 Median (org.apache.commons.math3.stat.descriptive.rank.Median)2 LogManager (org.apache.logging.log4j.LogManager)2 Logger (org.apache.logging.log4j.Logger)2 KernelDensity (org.apache.spark.mllib.stat.KernelDensity)2 Utils (org.broadinstitute.hellbender.utils.Utils)2 StandardFormat (de.bioforscher.jstructure.StandardFormat)1