use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testApproximateQuantilesWithNumberOfPoints.
@Test
public void testApproximateQuantilesWithNumberOfPoints() {
QuantileSketch sketch = new QuantileSketch(32, 2, Distribution.Type.QUANTILE, 11);
IntStream.range(1, 101).forEach(i -> sketch.update(i * 0.1));
Clip result = sketch.getResult("meta", ALL_METADATA);
Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
Assert.assertEquals(metadata.size(), 7);
Assert.assertTrue((Boolean) metadata.get("isEst"));
Assert.assertEquals((String) metadata.get("family"), Family.QUANTILES.getFamilyName());
Assert.assertTrue((Integer) metadata.get("size") >= 100);
double error = DoublesSketch.getNormalizedRankError(32);
Assert.assertEquals(metadata.get("nre"), error);
Assert.assertEquals(metadata.get("n"), 100L);
assertApproxEquals((Double) metadata.get("min"), 0.1);
assertApproxEquals((Double) metadata.get("max"), 10.0);
List<BulletRecord> records = result.getRecords();
for (BulletRecord record : records) {
Double quantile = (Double) record.get(QUANTILE_FIELD);
Double value = (Double) record.get(VALUE_FIELD);
// We input 100 values: 0.0, 0.1, ... 9.9, and our NRE is ~6.3%. This means, for e.g., that the 50th
// percentile value is approximate and is between the true 43th and 57th percentile, or between 4.3 and 5.7
// in our case. The NRE * 10 is the epsilon we should use our comparison with high probability.
assertApproxEquals(value, quantile * 10, error * 10);
}
Assert.assertEquals(sketch.getRecords(), records);
Assert.assertEquals(sketch.getMetadata("meta", ALL_METADATA).asMap(), result.getMeta().asMap());
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testNoDataCDFDistribution.
@Test
public void testNoDataCDFDistribution() {
QuantileSketch sketch = new QuantileSketch(64, 2, Distribution.Type.CDF, 10);
Clip result = sketch.getResult("meta", ALL_METADATA);
Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
Assert.assertEquals(metadata.size(), 7);
Assert.assertFalse((Boolean) metadata.get("isEst"));
Assert.assertEquals(metadata.get("n"), 0L);
Assert.assertEquals(metadata.get("min"), Double.POSITIVE_INFINITY);
Assert.assertEquals(metadata.get("max"), Double.NEGATIVE_INFINITY);
List<BulletRecord> records = result.getRecords();
Assert.assertEquals(records.size(), 2);
BulletRecord expectedA = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + Double.POSITIVE_INFINITY + END_EXCLUSIVE).add(PROBABILITY_FIELD, Double.NaN).add(COUNT_FIELD, Double.NaN).getRecord();
BulletRecord expectedB = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + POSITIVE_INFINITY_END).add(PROBABILITY_FIELD, Double.NaN).add(COUNT_FIELD, Double.NaN).getRecord();
Assert.assertEquals(records.get(0), expectedA);
Assert.assertEquals(records.get(1), expectedB);
Assert.assertEquals(sketch.getRecords(), records);
Assert.assertEquals(sketch.getMetadata("meta", ALL_METADATA).asMap(), result.getMeta().asMap());
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testRounding.
@Test
public void testRounding() {
QuantileSketch sketch = new QuantileSketch(64, 6, Distribution.Type.CDF, 10);
IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
Clip result = sketch.getResult(null, null);
Set<String> actualRangeEnds = result.getRecords().stream().map(r -> (String) r.get(RANGE_FIELD)).map(QuantileSketchTest::getEnd).collect(Collectors.toSet());
Set<String> expectedRangeEnds = new HashSet<>(Arrays.asList("0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0", POSITIVE_INFINITY));
Assert.assertEquals(actualRangeEnds, expectedRangeEnds);
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testResetting.
@Test
public void testResetting() {
QuantileSketch sketch = new QuantileSketch(64, Distribution.Type.CDF, makePoints(0.0, 9.0, 1.0));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
QuantileSketch anotherSketch = new QuantileSketch(64, Distribution.Type.CDF, makePoints(0.0, 9.0, 1.0));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
sketch.union(anotherSketch.serialize());
Clip result = sketch.getResult(null, null);
List<BulletRecord> records = result.getRecords();
for (BulletRecord record : records) {
String range = (String) record.get(RANGE_FIELD);
double count = (Double) record.get(COUNT_FIELD);
String rangeEnd = getEnd(range);
if (rangeEnd.equals(POSITIVE_INFINITY)) {
Assert.assertEquals(count, 60.0);
} else {
double end = Double.valueOf(rangeEnd);
if (end <= 3.0) {
Assert.assertEquals(count, end * 13.0);
} else {
Assert.assertEquals(count, 39.0 + (end - 3) * 3.0);
}
}
}
sketch.reset();
sketch.update(1.0);
result = sketch.getResult(null, null);
records = result.getRecords();
for (BulletRecord record : records) {
String range = (String) record.get(RANGE_FIELD);
double count = (Double) record.get(COUNT_FIELD);
String rangeEnd = getEnd(range);
if (rangeEnd.equals(POSITIVE_INFINITY)) {
Assert.assertEquals(count, 1.0);
} else {
double end = Double.valueOf(rangeEnd);
if (end <= 1.0) {
Assert.assertEquals(count, 0.0);
} else {
Assert.assertEquals(count, 1.0);
}
}
}
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class ThetaSketchTest method testUnioning.
@Test
public void testUnioning() {
ThetaSketch sketch = new ThetaSketch(ResizeFactor.X4, Family.ALPHA, 1.0f, 512);
IntStream.range(0, 1024).forEach(i -> sketch.update(String.valueOf(i)));
ThetaSketch anotherSketch = new ThetaSketch(ResizeFactor.X4, Family.ALPHA, 1.0f, 512);
IntStream.range(-1024, 0).forEach(i -> anotherSketch.update(String.valueOf(i)));
ThetaSketch unionSketch = new ThetaSketch(ResizeFactor.X4, Family.QUICKSELECT, 1.0f, 512);
unionSketch.union(sketch.serialize());
unionSketch.union(anotherSketch.serialize());
Clip result = unionSketch.getResult("meta", ALL_METADATA);
Map<String, Object> actualMeta = result.getMeta().asMap();
Assert.assertTrue(actualMeta.containsKey("meta"));
Map<String, Object> stats = (Map<String, Object>) actualMeta.get("meta");
Assert.assertEquals(stats.size(), 5);
Assert.assertTrue((Boolean) stats.get("isEst"));
Assert.assertTrue((Double) stats.get("theta") < 1.0);
// We inserted 2048 unique integers. Size is at least 512 bytes.
Assert.assertTrue((Integer) stats.get("size") > 512);
// The family is the family of the Union
Assert.assertEquals((String) stats.get("family"), Family.QUICKSELECT.getFamilyName());
Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
Assert.assertEquals(result.getRecords().size(), 1);
double actual = (Double) result.getRecords().get(0).get(ThetaSketch.COUNT_FIELD);
Assert.assertTrue(actual >= lowerOneSigma);
Assert.assertTrue(actual <= upperOneSigma);
Assert.assertTrue(actual >= lowerTwoSigma);
Assert.assertTrue(actual <= upperTwoSigma);
Assert.assertTrue(actual >= lowerThreeSigma);
Assert.assertTrue(actual <= upperThreeSigma);
Assert.assertEquals(unionSketch.getRecords(), result.getRecords());
Assert.assertEquals(unionSketch.getMetadata("meta", ALL_METADATA).asMap(), actualMeta);
}
Aggregations