use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testExactCDFWithProvidedPoints.
@Test
public void testExactCDFWithProvidedPoints() {
// Same results as the testExactPMFWithNumberOfPoints
QuantileSketch sketch = new QuantileSketch(64, Distribution.Type.CDF, makePoints(0.0, 9.0, 1.0));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
Clip result = sketch.getResult(null, null);
List<BulletRecord> records = result.getRecords();
for (BulletRecord record : records) {
String range = (String) record.get(RANGE_FIELD);
double count = (Double) record.get(COUNT_FIELD);
double probablity = (Double) record.get(PROBABILITY_FIELD);
String rangeEnd = getEnd(range);
if (rangeEnd.equals(POSITIVE_INFINITY)) {
Assert.assertEquals(count, 60.0);
Assert.assertEquals(probablity, 1.0);
} else {
double end = Double.valueOf(rangeEnd);
if (end <= 3.0) {
Assert.assertEquals(count, end * 13.0);
Assert.assertEquals(probablity, end * 13.0 / 60);
} else {
Assert.assertEquals(count, 39.0 + (end - 3) * 3.0);
Assert.assertEquals(probablity, (39.0 + (end - 3) * 3.0) / 60);
}
}
}
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class QuantileSketchTest method testExactPMFWithProvidedPoints.
@Test
public void testExactPMFWithProvidedPoints() {
// Same results as the testExactPMFWithNumberOfPoints
QuantileSketch sketch = new QuantileSketch(64, Distribution.Type.PMF, makePoints(0.0, 9.0, 1.0));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
Clip result = sketch.getResult(null, null);
List<BulletRecord> records = result.getRecords();
for (BulletRecord record : records) {
String range = (String) record.get(RANGE_FIELD);
double count = (Double) record.get(COUNT_FIELD);
double probablity = (Double) record.get(PROBABILITY_FIELD);
String rangeStart = getStart(range);
String rangeEnd = getEnd(range);
if (rangeStart.equals(NEGATIVE_INFINITY)) {
Assert.assertEquals(count, 0.0);
Assert.assertEquals(probablity, 0.0);
} else if (rangeEnd.equals(POSITIVE_INFINITY)) {
Assert.assertEquals(count, 3.0);
Assert.assertEquals(probablity, 1.0 / 20);
} else {
double start = Double.valueOf(rangeStart);
if (start <= 2.0) {
Assert.assertEquals(count, 13.0);
Assert.assertEquals(probablity, 13.0 / 60);
} else {
Assert.assertEquals(count, 3.0);
Assert.assertEquals(probablity, 1.0 / 20);
}
}
}
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class TupleSketchTest method testApproximateMetrics.
@Test
public void testApproximateMetrics() {
TupleSketch sketch = new TupleSketch(ResizeFactor.X4, 1.0f, 32, 16);
// Insert 2 duplicates of 0 - 63
IntStream.range(0, 128).forEach(i -> sketch.update(addToData(String.valueOf(i % 64), 1, data), data));
Clip result = sketch.getResult("meta", ALL_METADATA);
Map<String, Object> actualMeta = result.getMeta().asMap();
Assert.assertTrue(actualMeta.containsKey("meta"));
Map<String, Object> stats = (Map<String, Object>) actualMeta.get("meta");
Assert.assertEquals(stats.size(), 5);
Assert.assertTrue((Boolean) stats.get("isEst"));
Assert.assertTrue((Double) stats.get("theta") < 1.0);
Assert.assertEquals((String) stats.get("family"), Family.TUPLE.getFamilyName());
Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
double estimate = (Double) stats.get("est");
Assert.assertTrue(estimate >= lowerOneSigma);
Assert.assertTrue(estimate <= upperOneSigma);
Assert.assertTrue(estimate >= lowerTwoSigma);
Assert.assertTrue(estimate <= upperTwoSigma);
Assert.assertTrue(estimate >= lowerThreeSigma);
Assert.assertTrue(estimate <= upperThreeSigma);
Assert.assertEquals(result.getRecords().size(), 16);
for (BulletRecord actual : result.getRecords()) {
String fieldA = actual.get("A").toString();
String fieldB = actual.get("B").toString();
Long count = (Long) actual.get("cnt");
Double sumB = (Double) actual.get("sumB");
Double averageA = (Double) actual.get("avgA");
Assert.assertTrue(Integer.valueOf(fieldA) < 64);
Assert.assertEquals(Double.valueOf(fieldB), 1.0);
Assert.assertEquals(count, Long.valueOf(2));
Assert.assertEquals(sumB, 2.0);
// A <= 64, so even if count was 1 or 2, this should be < 64
Assert.assertTrue(averageA < 64.0);
}
Assert.assertEquals(sketch.getRecords(), result.getRecords());
Assert.assertEquals(sketch.getMetadata("meta", ALL_METADATA).asMap(), actualMeta);
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class GroupByTest method testCombining.
@Test
public void testCombining() {
List<String> fields = asList("fieldA", "fieldB");
GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
IntStream.range(0, 10).mapToObj(i -> recordA).forEach(groupBy::consume);
IntStream.range(0, 9).mapToObj(i -> recordB).forEach(groupBy::consume);
IntStream.range(0, 20).mapToObj(i -> recordA).forEach(groupBy::consume);
groupBy.consume(recordB);
byte[] firstSerialized = groupBy.getData();
// Remake it
groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
byte[] secondSerialized = groupBy.getData();
groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
groupBy.combine(firstSerialized);
groupBy.combine(secondSerialized);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 3);
// count = 10 + 20 + 30, price = 10*3 + 20*3 + 30*3
BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 60L).add("priceSum", 180.0).getRecord();
// count = 9 + 1, price = 9*1 + 1*1
BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
// count = 10, price = 10*10
BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
assertContains(records, expectedA);
assertContains(records, expectedB);
assertContains(records, expectedC);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.
the class GroupByTest method testGroupByOperations.
@Test
public void testGroupByOperations() {
List<String> fields = asList("fieldA", "fieldB");
GroupBy groupBy = makeGroupBy(fields, 3, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordB = RecordBox.get().addNull("fieldA").add("fieldB", "bar").add("price", 1).getRecord();
IntStream.range(0, 10).forEach(i -> groupBy.consume(recordA));
IntStream.range(0, 9).forEach(i -> groupBy.consume(recordB));
IntStream.range(0, 20).forEach(i -> groupBy.consume(recordA));
groupBy.consume(recordB);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
Map<String, Object> meta = aggregate.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 2);
// count = 10 + 20, price = 10*3 + 20*3
BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
// count = 9 + 1, price = 9*1 + 1*1
BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
assertContains(records, expectedA);
assertContains(records, expectedB);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Aggregations