use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.
the class ThetaSketchingStrategyTest method testResetting.
@Test
public void testResetting() {
BulletConfig config = makeConfiguration(4, 1024);
ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), "myCount");
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
Clip clip = countDistinct.getResult();
Map<String, Object> meta = clip.getMeta().asMap();
Assert.assertEquals(meta.size(), 0);
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
BulletRecord expected = RecordBox.get().add("myCount", 256L).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
countDistinct.reset();
IntStream.range(0, 15).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
clip = countDistinct.getResult();
meta = clip.getMeta().asMap();
Assert.assertEquals(meta.size(), 0);
Assert.assertEquals(clip.getRecords().size(), 1);
actual = clip.getRecords().get(0);
expected = RecordBox.get().add("myCount", 15L).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.
the class ThetaSketchingStrategyTest method testNewNamingOfResult.
@Test
public void testNewNamingOfResult() {
BulletConfig config = makeConfiguration(4, 1024);
ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), "myCount", Pair.of(Concept.SKETCH_METADATA, "stats"), Pair.of(Concept.SKETCH_ESTIMATED_RESULT, "est"));
IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
Clip clip = countDistinct.getResult();
Map<String, Object> meta = clip.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
Assert.assertTrue(meta.containsKey("stats"));
Map<String, Object> stats = (Map<String, Object>) meta.get("stats");
Assert.assertEquals(stats.size(), 1);
Assert.assertFalse((Boolean) stats.get("est"));
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
BulletRecord expected = RecordBox.get().add("myCount", 1000L).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.
the class QuerierTest method testMetadataDisabled.
@Test
public void testMetadataDisabled() {
BulletConfig config = new BulletConfig();
config.set(BulletConfig.RESULT_METADATA_ENABLE, false);
// Should clear out the default metadata
config.validate();
CountDistinct aggregation = new CountDistinct(Collections.singletonList("foo"), "count");
Window window = WindowUtils.makeTumblingWindow(1);
Query query = new Query(new Projection(), null, aggregation, null, window, null);
query.configure(config);
Querier querier = make(Querier.Mode.PARTITION, query, config);
querier.consume(RecordBox.get().add("foo", "A").getRecord());
Assert.assertTrue(querier.getMetadata().asMap().isEmpty());
}
use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.
the class ThetaSketchingStrategyTest method testMultipleFieldsCountDistinct.
@Test
public void testMultipleFieldsCountDistinct() {
BulletConfig config = makeConfiguration(4, 512);
ThetaSketchingStrategy countDistinct = makeCountDistinct(config, asList("fieldA", "fieldB"), "myCount");
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i).add("fieldB", 255 - i).getRecord()).forEach(countDistinct::consume);
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i).add("fieldB", 255 - i).getRecord()).forEach(countDistinct::consume);
Clip clip = countDistinct.getResult();
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
BulletRecord expected = RecordBox.get().add("myCount", 256L).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.
the class ThetaSketchingStrategyTest method testSingleFieldApproximateCountDistinctWithMetadata.
@Test
public void testSingleFieldApproximateCountDistinctWithMetadata() {
BulletConfig config = makeConfiguration(4, 512);
ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), DEFAULT_NAME, Pair.of(Concept.SKETCH_METADATA, "aggregate_stats"), Pair.of(Concept.SKETCH_FAMILY, "family"), Pair.of(Concept.SKETCH_SIZE, "size"), Pair.of(Concept.SKETCH_THETA, "theta"), Pair.of(Concept.SKETCH_ESTIMATED_RESULT, "isEstimate"), Pair.of(Concept.SKETCH_STANDARD_DEVIATIONS, "stddev"));
IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
Assert.assertNotNull(countDistinct.getData());
Clip clip = countDistinct.getResult();
Map<String, Object> meta = clip.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
Assert.assertTrue(meta.containsKey("aggregate_stats"));
Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
Assert.assertEquals(stats.size(), 5);
Assert.assertTrue((Boolean) stats.get("isEstimate"));
Assert.assertEquals(stats.get("family").toString(), Family.ALPHA.getFamilyName());
int size = (Integer) stats.get("size");
// We inserted more than 512 unique entries
Assert.assertTrue(size > 512);
double theta = (Double) stats.get("theta");
Assert.assertTrue(theta <= 1.0);
Assert.assertTrue(stats.containsKey("stddev"));
Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
Assert.assertEquals(standardDeviations.size(), 3);
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
double roundedEstimate = (Long) actual.typedGet(DEFAULT_NAME).getValue();
double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
// Should be ok since the bounds are relatively big
Assert.assertTrue(roundedEstimate >= lowerOneSigma);
Assert.assertTrue(roundedEstimate <= upperOneSigma);
Assert.assertTrue(roundedEstimate >= lowerTwoSigma);
Assert.assertTrue(roundedEstimate <= upperTwoSigma);
Assert.assertTrue(roundedEstimate >= lowerThreeSigma);
Assert.assertTrue(roundedEstimate <= upperThreeSigma);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
Aggregations