use of com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT in project bullet-core by yahoo.
the class GroupByTest method testResetting.
@Test
public void testResetting() {
List<String> fields = asList("fieldA", "fieldB");
GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
IntStream.range(0, 30).mapToObj(i -> recordA).forEach(groupBy::consume);
IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 2);
assertContains(records, expectedA);
assertContains(records, expectedB);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
groupBy.reset();
BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
records = aggregate.getRecords();
Assert.assertEquals(records.size(), 3);
expectedA = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
expectedB = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
assertContains(records, expectedA);
assertContains(records, expectedB);
assertContains(records, expectedC);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT in project bullet-core by yahoo.
the class GroupByTest method testMetadata.
@Test
public void testMetadata() {
Map<String, String> fields = singletonMap("fieldA", null);
// Nominal Entries is 32. Aggregation size is also 32
GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
// Generate 4 batches of 64 records with 0 - 63 in fieldA.
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 32);
records.forEach(r -> Assert.assertTrue(Integer.valueOf(r.get("fieldA").toString()) < 64));
Map<String, Object> meta = aggregate.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
Assert.assertEquals(stats.size(), 4);
Assert.assertTrue((Boolean) stats.get("isEstimate"));
double theta = (Double) stats.get("theta");
Assert.assertTrue(theta <= 1.0);
double groupEstimate = (Double) stats.get("uniquesApprox");
Assert.assertTrue(stats.containsKey("stddev"));
Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
Assert.assertEquals(standardDeviations.size(), 3);
double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
Assert.assertTrue(groupEstimate >= lowerOneSigma);
Assert.assertTrue(groupEstimate <= upperOneSigma);
Assert.assertTrue(groupEstimate >= lowerTwoSigma);
Assert.assertTrue(groupEstimate <= upperTwoSigma);
Assert.assertTrue(groupEstimate >= lowerThreeSigma);
Assert.assertTrue(groupEstimate <= upperThreeSigma);
Assert.assertTrue(groupEstimate <= upperThreeSigma);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT in project bullet-core by yahoo.
the class GroupByTest method testMoreGroupsThanNominalEntries.
@Test
public void testMoreGroupsThanNominalEntries() {
Map<String, String> fields = singletonMap("fieldA", "A");
// Nominal Entries is 32. Aggregation size is also 32
GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
// Generate 4 batches of 64 records with 0 - 63 in fieldA.
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
Map<String, Object> meta = aggregate.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 32);
Set<String> groups = new HashSet<>();
for (BulletRecord record : records) {
groups.add((String) record.get("A"));
Assert.assertEquals(record.get(COUNT.getName()), 4L);
}
Assert.assertEquals(groups.size(), 32);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT in project bullet-core by yahoo.
the class GroupByTest method testCombiningAndConsuming.
@Test
public void testCombiningAndConsuming() {
List<String> fields = asList("fieldA", "fieldB");
GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
IntStream.range(0, 30).mapToObj(i -> recordA).forEach(groupBy::consume);
IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
byte[] serialized = groupBy.getData();
// Remake it
groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
groupBy.combine(serialized);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 3);
BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 60L).add("priceSum", 180.0).getRecord();
BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
assertContains(records, expectedA);
assertContains(records, expectedB);
assertContains(records, expectedC);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT in project bullet-storm by yahoo.
the class JoinBoltTest method testCountDistinct.
@Test
public void testCountDistinct() {
BulletConfig bulletConfig = CountDistinctTest.makeConfiguration(8, 512);
CountDistinct distinct = CountDistinctTest.makeCountDistinct(bulletConfig, singletonList("field"));
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(distinct::consume);
byte[] first = distinct.getData();
distinct = CountDistinctTest.makeCountDistinct(bulletConfig, singletonList("field"));
IntStream.range(128, 256).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(distinct::consume);
byte[] second = distinct.getData();
// Send generated data to JoinBolt
bolt = new DonableJoinBolt(config, 2, true);
setup(bolt);
Tuple query = TupleUtils.makeIDTuple(TupleClassifier.Type.QUERY_TUPLE, "42", makeAggregationQuery(COUNT_DISTINCT, 1, null, Pair.of("field", "field")), EMPTY);
bolt.execute(query);
sendRawByteTuplesTo(bolt, "42", asList(first, second));
List<BulletRecord> result = singletonList(RecordBox.get().add(CountDistinct.DEFAULT_NEW_NAME, 256.0).getRecord());
Tuple expected = TupleUtils.makeTuple(TupleClassifier.Type.RESULT_TUPLE, "42", Clip.of(result).asJSON(), COMPLETED);
Tuple tick = TupleUtils.makeTuple(TupleClassifier.Type.TICK_TUPLE);
bolt.execute(tick);
for (int i = 0; i < BulletStormConfig.DEFAULT_JOIN_BOLT_QUERY_TICK_TIMEOUT - 1; ++i) {
bolt.execute(tick);
Assert.assertFalse(wasResultEmittedTo(TopologyConstants.RESULT_STREAM, expected));
}
bolt.execute(tick);
Assert.assertTrue(wasResultEmittedTo(TopologyConstants.RESULT_STREAM, expected));
Tuple metadata = TupleUtils.makeTuple(TupleClassifier.Type.FEEDBACK_TUPLE, "42", new Metadata(Metadata.Signal.COMPLETE, null));
Assert.assertTrue(wasMetadataEmittedTo(TopologyConstants.FEEDBACK_STREAM, metadata));
Assert.assertEquals(collector.getAllEmittedTo(TopologyConstants.RESULT_STREAM).count(), 1);
Assert.assertEquals(collector.getAllEmittedTo(TopologyConstants.FEEDBACK_STREAM).count(), 1);
}
Aggregations