Search in sources :

Example 71 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project urban-eureka by errir503.

the class AbstractTestApproximateCountDistinct method testMultiplePositions.

@Test(dataProvider = "provideStandardErrors")
public void testMultiplePositions(double maxStandardError) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < 500; ++i) {
        int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;
        List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));
        long actual = estimateGroupByCount(values, maxStandardError);
        double error = (actual - uniques) * 1.0 / uniques;
        stats.addValue(error);
    }
    assertLessThan(stats.getMean(), 1.0e-2);
    assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError);
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Test(org.testng.annotations.Test)

Example 72 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project concourse by cinchapi.

the class TrackingMultimap method spread.

/**
 * Return a relative measure of the statistical dispersion in this data.
 * <p>
 * There are several ways to measure statistical dispersion, so callers
 * should not rely on a specific underlying implementation because it may
 * change over time. This method simply offers a value that allows for
 * comparison of dispersion across data sets.
 * </p>
 * <p>
 * A larger dispersion value means that the data is more spread out whereas
 * a smaller dispersion value indicates the opposite.
 * </p>
 *
 * @return the dispersion value for this data
 */
public double spread() {
    // Get the quartile coefficient of dispersion, which is a cross
    // dataset mechanism for comparing the relative dispersion of data.
    double[] frequencies = new double[size()];
    AtomicInteger index = new AtomicInteger(0);
    data.values().forEach(records -> frequencies[index.getAndIncrement()] = records.size());
    DescriptiveStatistics stats = new DescriptiveStatistics(frequencies);
    double p1 = stats.getPercentile(25);
    double p3 = stats.getPercentile(75);
    double coefficientOfDispersion = (p3 - p1) / (p3 + p1);
    // Grab the coefficient of variance
    double coefficientOfVariance = stats.getStandardDeviation() / stats.getMean();
    // Calculate the average absolute deviation from the mean
    double[] deviations = new double[frequencies.length];
    for (int i = 0; i < deviations.length; ++i) {
        deviations[i] = Math.abs(frequencies[i] - stats.getMean());
    }
    double averageAbsoluteDeviation = StatUtils.mean(deviations) / stats.getMean();
    // Apply a weighting to the various components
    return (0.50 * coefficientOfDispersion) + (0.40 * coefficientOfVariance) + (0.10 * averageAbsoluteDeviation);
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 73 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project SensorThingsProcessor by FraunhoferIOSB.

the class Aggregator method calculateAggregateResultFromOriginalLists.

public List<BigDecimal> calculateAggregateResultFromOriginalLists(Interval interval, List<Observation> sourceObs) {
    List<BigDecimal> result;
    int scale = 0;
    DescriptiveStatistics stats = new DescriptiveStatistics();
    BigDecimal min = new BigDecimal(Double.MAX_VALUE);
    BigDecimal max = new BigDecimal(-Double.MAX_VALUE);
    for (Observation obs : sourceObs) {
        Object obsResultObj = obs.getResult();
        if (!(obsResultObj instanceof List)) {
            LOGGER.error("Expected list result, got {}", obsResultObj == null ? obsResultObj : obsResultObj.getClass().getName());
            continue;
        }
        List list = (List) obsResultObj;
        TimeObject phenomenonTime = obs.getPhenomenonTime();
        if (!phenomenonTime.isInterval()) {
            LOGGER.error("Expected phenTime to be an interval.");
            continue;
        }
        Interval phenInterval = phenomenonTime.getAsInterval();
        int itemCount = list.size();
        int firstItem = 0;
        int lastItem = itemCount - 1;
        double itemDistMillis = ((double) phenInterval.toDuration().toMillis()) / itemCount;
        if (phenInterval.getStart().isBefore(interval.getStart())) {
            long skipMillis = Duration.between(phenInterval.getStart(), interval.getStart()).toMillis();
            firstItem = (int) (skipMillis / itemDistMillis);
        }
        if (phenInterval.getEnd().isAfter(interval.getEnd())) {
            long skipMillis = Duration.between(interval.getEnd(), phenInterval.getEnd()).toMillis();
            int skipEnd = (int) (skipMillis / itemDistMillis);
            lastItem -= skipEnd;
        }
        for (int i = firstItem; i <= lastItem && i < itemCount; i++) {
            BigDecimal number = handleResult(list.get(i));
            if (number == null) {
                LOGGER.warn("Empty result in {}", obs);
                continue;
            }
            scale = Math.max(getScale(number), scale);
            stats.addValue(number.doubleValue());
            min = number.compareTo(min) < 0 ? number : min;
            max = number.compareTo(max) > 0 ? number : max;
        }
    }
    BigDecimal avg = new BigDecimal(stats.getMean());
    BigDecimal dev = new BigDecimal(stats.getStandardDeviation());
    result = new ArrayList<>(4);
    result.add(avg.setScale(Math.min(scale, avg.scale()), RoundingMode.HALF_UP));
    result.add(min);
    result.add(max);
    result.add(dev.setScale(Math.min(scale, dev.scale()), RoundingMode.HALF_UP));
    return result;
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Observation(de.fraunhofer.iosb.ilt.sta.model.Observation) TimeObject(de.fraunhofer.iosb.ilt.sta.model.TimeObject) TimeObject(de.fraunhofer.iosb.ilt.sta.model.TimeObject) List(java.util.List) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) Interval(org.threeten.extra.Interval)

Example 74 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project ozone by apache.

the class TestContainerPlacement method testCapacityPlacementYieldsBetterDataDistribution.

/**
 * This test simulates lots of Cluster I/O and updates the metadata in SCM.
 * We simulate adding and removing containers from the cluster. It asserts
 * that our placement algorithm has taken the capacity of nodes into
 * consideration by asserting that standard deviation of used space on these
 * has improved.
 */
@Test
public void testCapacityPlacementYieldsBetterDataDistribution() throws SCMException {
    final int opsCount = 200 * 1000;
    final int nodesRequired = 3;
    Random random = new Random();
    // The nature of init code in MockNodeManager yields similar clusters.
    MockNodeManager nodeManagerCapacity = new MockNodeManager(true, 100);
    MockNodeManager nodeManagerRandom = new MockNodeManager(true, 100);
    DescriptiveStatistics beforeCapacity = computeStatistics(nodeManagerCapacity);
    DescriptiveStatistics beforeRandom = computeStatistics(nodeManagerRandom);
    // Assert that our initial layout of clusters are similar.
    assertEquals(beforeCapacity.getStandardDeviation(), beforeRandom.getStandardDeviation(), 0.001);
    SCMContainerPlacementCapacity capacityPlacer = new SCMContainerPlacementCapacity(nodeManagerCapacity, new OzoneConfiguration(), null, true, null);
    SCMContainerPlacementRandom randomPlacer = new SCMContainerPlacementRandom(nodeManagerRandom, new OzoneConfiguration(), null, true, null);
    for (int x = 0; x < opsCount; x++) {
        long containerSize = random.nextInt(10) * OzoneConsts.GB;
        long metadataSize = random.nextInt(10) * OzoneConsts.GB;
        List<DatanodeDetails> nodesCapacity = capacityPlacer.chooseDatanodes(new ArrayList<>(), null, nodesRequired, metadataSize, containerSize);
        assertEquals(nodesRequired, nodesCapacity.size());
        List<DatanodeDetails> nodesRandom = randomPlacer.chooseDatanodes(nodesCapacity, null, nodesRequired, metadataSize, containerSize);
        // One fifth of all calls are delete
        if (x % 5 == 0) {
            deleteContainer(nodeManagerCapacity, nodesCapacity, containerSize);
            deleteContainer(nodeManagerRandom, nodesRandom, containerSize);
        } else {
            createContainer(nodeManagerCapacity, nodesCapacity, containerSize);
            createContainer(nodeManagerRandom, nodesRandom, containerSize);
        }
    }
    DescriptiveStatistics postCapacity = computeStatistics(nodeManagerCapacity);
    DescriptiveStatistics postRandom = computeStatistics(nodeManagerRandom);
    // This is a very bold claim, and needs large number of I/O operations.
    // The claim in this assertion is that we improved the data distribution
    // of this cluster in relation to the start state of the cluster.
    Assert.assertTrue(beforeCapacity.getStandardDeviation() > postCapacity.getStandardDeviation());
    // This asserts that Capacity placement yields a better placement
    // algorithm than random placement, since both cluster started at an
    // identical state.
    Assert.assertTrue(postRandom.getStandardDeviation() > postCapacity.getStandardDeviation());
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Random(java.util.Random) SCMContainerPlacementRandom(org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom) SCMContainerPlacementCapacity(org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity) SCMContainerPlacementRandom(org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRandom) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) MockNodeManager(org.apache.hadoop.hdds.scm.container.MockNodeManager) Test(org.junit.Test)

Example 75 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project scout.rt by eclipse-scout.

the class TableLoadingPerfTest method assertMean.

private void assertMean(Long[] durations, int expectedMean) {
    Arrays.sort(durations);
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 1; i < durations.length - 1; i++) {
        stats.addValue(durations[i]);
    }
    double avgDuration = stats.getMean();
    assertTrue(String.format("Expected Mean<100 Mean:%s Variance:%s", avgDuration, stats.getVariance()), avgDuration < expectedMean);
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Aggregations

DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)176 ArrayList (java.util.ArrayList)21 List (java.util.List)15 Test (org.testng.annotations.Test)15 Test (org.junit.Test)13 Test (org.junit.jupiter.api.Test)12 File (java.io.File)11 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)8 TException (org.apache.thrift.TException)7 Result (de.dagere.kopeme.generated.Result)6 Plot (ij.gui.Plot)6 HashMap (java.util.HashMap)6 AbstractMagmaTest (org.obiba.magma.test.AbstractMagmaTest)6 IOException (java.io.IOException)5 LinkedList (java.util.LinkedList)5 Map (java.util.Map)5 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)5 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)4 ImagePlus (ij.ImagePlus)4 ImageStack (ij.ImageStack)4