use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project urban-eureka by errir503.
the class AbstractTestApproximateCountDistinct method testMultiplePositions.
@Test(dataProvider = "provideStandardErrors")
public void testMultiplePositions(double maxStandardError) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int i = 0; i < 500; ++i) {
int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;
List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));
long actual = estimateGroupByCount(values, maxStandardError);
double error = (actual - uniques) * 1.0 / uniques;
stats.addValue(error);
}
assertLessThan(stats.getMean(), 1.0e-2);
assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError);
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project concourse by cinchapi.
the class TrackingMultimap method spread.
/**
* Return a relative measure of the statistical dispersion in this data.
* <p>
* There are several ways to measure statistical dispersion, so callers
* should not rely on a specific underlying implementation because it may
* change over time. This method simply offers a value that allows for
* comparison of dispersion across data sets.
* </p>
* <p>
* A larger dispersion value means that the data is more spread out whereas
* a smaller dispersion value indicates the opposite.
* </p>
*
* @return the dispersion value for this data
*/
public double spread() {
// Get the quartile coefficient of dispersion, which is a cross
// dataset mechanism for comparing the relative dispersion of data.
double[] frequencies = new double[size()];
AtomicInteger index = new AtomicInteger(0);
data.values().forEach(records -> frequencies[index.getAndIncrement()] = records.size());
DescriptiveStatistics stats = new DescriptiveStatistics(frequencies);
double p1 = stats.getPercentile(25);
double p3 = stats.getPercentile(75);
double coefficientOfDispersion = (p3 - p1) / (p3 + p1);
// Grab the coefficient of variance
double coefficientOfVariance = stats.getStandardDeviation() / stats.getMean();
// Calculate the average absolute deviation from the mean
double[] deviations = new double[frequencies.length];
for (int i = 0; i < deviations.length; ++i) {
deviations[i] = Math.abs(frequencies[i] - stats.getMean());
}
double averageAbsoluteDeviation = StatUtils.mean(deviations) / stats.getMean();
// Apply a weighting to the various components
return (0.50 * coefficientOfDispersion) + (0.40 * coefficientOfVariance) + (0.10 * averageAbsoluteDeviation);
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project SensorThingsProcessor by FraunhoferIOSB.
the class Aggregator method calculateAggregateResultFromOriginalLists.
public List<BigDecimal> calculateAggregateResultFromOriginalLists(Interval interval, List<Observation> sourceObs) {
List<BigDecimal> result;
int scale = 0;
DescriptiveStatistics stats = new DescriptiveStatistics();
BigDecimal min = new BigDecimal(Double.MAX_VALUE);
BigDecimal max = new BigDecimal(-Double.MAX_VALUE);
for (Observation obs : sourceObs) {
Object obsResultObj = obs.getResult();
if (!(obsResultObj instanceof List)) {
LOGGER.error("Expected list result, got {}", obsResultObj == null ? obsResultObj : obsResultObj.getClass().getName());
continue;
}
List list = (List) obsResultObj;
TimeObject phenomenonTime = obs.getPhenomenonTime();
if (!phenomenonTime.isInterval()) {
LOGGER.error("Expected phenTime to be an interval.");
continue;
}
Interval phenInterval = phenomenonTime.getAsInterval();
int itemCount = list.size();
int firstItem = 0;
int lastItem = itemCount - 1;
double itemDistMillis = ((double) phenInterval.toDuration().toMillis()) / itemCount;
if (phenInterval.getStart().isBefore(interval.getStart())) {
long skipMillis = Duration.between(phenInterval.getStart(), interval.getStart()).toMillis();
firstItem = (int) (skipMillis / itemDistMillis);
}
if (phenInterval.getEnd().isAfter(interval.getEnd())) {
long skipMillis = Duration.between(interval.getEnd(), phenInterval.getEnd()).toMillis();
int skipEnd = (int) (skipMillis / itemDistMillis);
lastItem -= skipEnd;
}
for (int i = firstItem; i <= lastItem && i < itemCount; i++) {
BigDecimal number = handleResult(list.get(i));
if (number == null) {
LOGGER.warn("Empty result in {}", obs);
continue;
}
scale = Math.max(getScale(number), scale);
stats.addValue(number.doubleValue());
min = number.compareTo(min) < 0 ? number : min;
max = number.compareTo(max) > 0 ? number : max;
}
}
BigDecimal avg = new BigDecimal(stats.getMean());
BigDecimal dev = new BigDecimal(stats.getStandardDeviation());
result = new ArrayList<>(4);
result.add(avg.setScale(Math.min(scale, avg.scale()), RoundingMode.HALF_UP));
result.add(min);
result.add(max);
result.add(dev.setScale(Math.min(scale, dev.scale()), RoundingMode.HALF_UP));
return result;
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project ozone by apache.
the class TestContainerPlacement method testCapacityPlacementYieldsBetterDataDistribution.
/**
* This test simulates lots of Cluster I/O and updates the metadata in SCM.
* We simulate adding and removing containers from the cluster. It asserts
* that our placement algorithm has taken the capacity of nodes into
* consideration by asserting that standard deviation of used space on these
* has improved.
*/
@Test
public void testCapacityPlacementYieldsBetterDataDistribution() throws SCMException {
final int opsCount = 200 * 1000;
final int nodesRequired = 3;
Random random = new Random();
// The nature of init code in MockNodeManager yields similar clusters.
MockNodeManager nodeManagerCapacity = new MockNodeManager(true, 100);
MockNodeManager nodeManagerRandom = new MockNodeManager(true, 100);
DescriptiveStatistics beforeCapacity = computeStatistics(nodeManagerCapacity);
DescriptiveStatistics beforeRandom = computeStatistics(nodeManagerRandom);
// Assert that our initial layout of clusters are similar.
assertEquals(beforeCapacity.getStandardDeviation(), beforeRandom.getStandardDeviation(), 0.001);
SCMContainerPlacementCapacity capacityPlacer = new SCMContainerPlacementCapacity(nodeManagerCapacity, new OzoneConfiguration(), null, true, null);
SCMContainerPlacementRandom randomPlacer = new SCMContainerPlacementRandom(nodeManagerRandom, new OzoneConfiguration(), null, true, null);
for (int x = 0; x < opsCount; x++) {
long containerSize = random.nextInt(10) * OzoneConsts.GB;
long metadataSize = random.nextInt(10) * OzoneConsts.GB;
List<DatanodeDetails> nodesCapacity = capacityPlacer.chooseDatanodes(new ArrayList<>(), null, nodesRequired, metadataSize, containerSize);
assertEquals(nodesRequired, nodesCapacity.size());
List<DatanodeDetails> nodesRandom = randomPlacer.chooseDatanodes(nodesCapacity, null, nodesRequired, metadataSize, containerSize);
// One fifth of all calls are delete
if (x % 5 == 0) {
deleteContainer(nodeManagerCapacity, nodesCapacity, containerSize);
deleteContainer(nodeManagerRandom, nodesRandom, containerSize);
} else {
createContainer(nodeManagerCapacity, nodesCapacity, containerSize);
createContainer(nodeManagerRandom, nodesRandom, containerSize);
}
}
DescriptiveStatistics postCapacity = computeStatistics(nodeManagerCapacity);
DescriptiveStatistics postRandom = computeStatistics(nodeManagerRandom);
// This is a very bold claim, and needs large number of I/O operations.
// The claim in this assertion is that we improved the data distribution
// of this cluster in relation to the start state of the cluster.
Assert.assertTrue(beforeCapacity.getStandardDeviation() > postCapacity.getStandardDeviation());
// This asserts that Capacity placement yields a better placement
// algorithm than random placement, since both cluster started at an
// identical state.
Assert.assertTrue(postRandom.getStandardDeviation() > postCapacity.getStandardDeviation());
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project scout.rt by eclipse-scout.
the class TableLoadingPerfTest method assertMean.
private void assertMean(Long[] durations, int expectedMean) {
Arrays.sort(durations);
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int i = 1; i < durations.length - 1; i++) {
stats.addValue(durations[i]);
}
double avgDuration = stats.getMean();
assertTrue(String.format("Expected Mean<100 Mean:%s Variance:%s", avgDuration, stats.getVariance()), avgDuration < expectedMean);
}
Aggregations