Search in sources :

Example 6 with CountDistinct

use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.

the class ThetaSketchingStrategyTest method testResetting.

@Test
public void testResetting() {
    BulletConfig config = makeConfiguration(4, 1024);
    ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), "myCount");
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
    Clip clip = countDistinct.getResult();
    Map<String, Object> meta = clip.getMeta().asMap();
    Assert.assertEquals(meta.size(), 0);
    Assert.assertEquals(clip.getRecords().size(), 1);
    BulletRecord actual = clip.getRecords().get(0);
    BulletRecord expected = RecordBox.get().add("myCount", 256L).getRecord();
    Assert.assertEquals(actual, expected);
    Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
    Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
    countDistinct.reset();
    IntStream.range(0, 15).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
    clip = countDistinct.getResult();
    meta = clip.getMeta().asMap();
    Assert.assertEquals(meta.size(), 0);
    Assert.assertEquals(clip.getRecords().size(), 1);
    actual = clip.getRecords().get(0);
    expected = RecordBox.get().add("myCount", 15L).getRecord();
    Assert.assertEquals(actual, expected);
    Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
    Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
Also used : IntStream(java.util.stream.IntStream) BulletRecord(com.yahoo.bullet.record.BulletRecord) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) Family(com.yahoo.sketches.Family) KMVSketch(com.yahoo.bullet.querying.aggregations.sketches.KMVSketch) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) CountDistinct(com.yahoo.bullet.query.aggregations.CountDistinct) Arrays.asList(java.util.Arrays.asList) BulletConfig(com.yahoo.bullet.common.BulletConfig) Map(java.util.Map) Collections(java.util.Collections) ResizeFactor(com.yahoo.sketches.ResizeFactor) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Example 7 with CountDistinct

use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.

the class ThetaSketchingStrategyTest method testNewNamingOfResult.

@Test
public void testNewNamingOfResult() {
    BulletConfig config = makeConfiguration(4, 1024);
    ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), "myCount", Pair.of(Concept.SKETCH_METADATA, "stats"), Pair.of(Concept.SKETCH_ESTIMATED_RESULT, "est"));
    IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
    Clip clip = countDistinct.getResult();
    Map<String, Object> meta = clip.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    Assert.assertTrue(meta.containsKey("stats"));
    Map<String, Object> stats = (Map<String, Object>) meta.get("stats");
    Assert.assertEquals(stats.size(), 1);
    Assert.assertFalse((Boolean) stats.get("est"));
    Assert.assertEquals(clip.getRecords().size(), 1);
    BulletRecord actual = clip.getRecords().get(0);
    BulletRecord expected = RecordBox.get().add("myCount", 1000L).getRecord();
    Assert.assertEquals(actual, expected);
    Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
    Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
Also used : IntStream(java.util.stream.IntStream) BulletRecord(com.yahoo.bullet.record.BulletRecord) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) Family(com.yahoo.sketches.Family) KMVSketch(com.yahoo.bullet.querying.aggregations.sketches.KMVSketch) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) CountDistinct(com.yahoo.bullet.query.aggregations.CountDistinct) Arrays.asList(java.util.Arrays.asList) BulletConfig(com.yahoo.bullet.common.BulletConfig) Map(java.util.Map) Collections(java.util.Collections) ResizeFactor(com.yahoo.sketches.ResizeFactor) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Map(java.util.Map) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Example 8 with CountDistinct

use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.

the class QuerierTest method testMetadataDisabled.

@Test
public void testMetadataDisabled() {
    BulletConfig config = new BulletConfig();
    config.set(BulletConfig.RESULT_METADATA_ENABLE, false);
    // Should clear out the default metadata
    config.validate();
    CountDistinct aggregation = new CountDistinct(Collections.singletonList("foo"), "count");
    Window window = WindowUtils.makeTumblingWindow(1);
    Query query = new Query(new Projection(), null, aggregation, null, window, null);
    query.configure(config);
    Querier querier = make(Querier.Mode.PARTITION, query, config);
    querier.consume(RecordBox.get().add("foo", "A").getRecord());
    Assert.assertTrue(querier.getMetadata().asMap().isEmpty());
}
Also used : Window(com.yahoo.bullet.query.Window) Query(com.yahoo.bullet.query.Query) Projection(com.yahoo.bullet.query.Projection) CountDistinct(com.yahoo.bullet.query.aggregations.CountDistinct) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test) BulletConfigTest(com.yahoo.bullet.common.BulletConfigTest)

Example 9 with CountDistinct

use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.

the class ThetaSketchingStrategyTest method testMultipleFieldsCountDistinct.

@Test
public void testMultipleFieldsCountDistinct() {
    BulletConfig config = makeConfiguration(4, 512);
    ThetaSketchingStrategy countDistinct = makeCountDistinct(config, asList("fieldA", "fieldB"), "myCount");
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i).add("fieldB", 255 - i).getRecord()).forEach(countDistinct::consume);
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i).add("fieldB", 255 - i).getRecord()).forEach(countDistinct::consume);
    Clip clip = countDistinct.getResult();
    Assert.assertEquals(clip.getRecords().size(), 1);
    BulletRecord actual = clip.getRecords().get(0);
    BulletRecord expected = RecordBox.get().add("myCount", 256L).getRecord();
    Assert.assertEquals(actual, expected);
    Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
    Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
Also used : IntStream(java.util.stream.IntStream) BulletRecord(com.yahoo.bullet.record.BulletRecord) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) Family(com.yahoo.sketches.Family) KMVSketch(com.yahoo.bullet.querying.aggregations.sketches.KMVSketch) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) CountDistinct(com.yahoo.bullet.query.aggregations.CountDistinct) Arrays.asList(java.util.Arrays.asList) BulletConfig(com.yahoo.bullet.common.BulletConfig) Map(java.util.Map) Collections(java.util.Collections) ResizeFactor(com.yahoo.sketches.ResizeFactor) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Example 10 with CountDistinct

use of com.yahoo.bullet.query.aggregations.CountDistinct in project bullet-core by yahoo.

the class ThetaSketchingStrategyTest method testSingleFieldApproximateCountDistinctWithMetadata.

@Test
public void testSingleFieldApproximateCountDistinctWithMetadata() {
    BulletConfig config = makeConfiguration(4, 512);
    ThetaSketchingStrategy countDistinct = makeCountDistinct(config, Collections.singletonList("field"), DEFAULT_NAME, Pair.of(Concept.SKETCH_METADATA, "aggregate_stats"), Pair.of(Concept.SKETCH_FAMILY, "family"), Pair.of(Concept.SKETCH_SIZE, "size"), Pair.of(Concept.SKETCH_THETA, "theta"), Pair.of(Concept.SKETCH_ESTIMATED_RESULT, "isEstimate"), Pair.of(Concept.SKETCH_STANDARD_DEVIATIONS, "stddev"));
    IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
    Assert.assertNotNull(countDistinct.getData());
    Clip clip = countDistinct.getResult();
    Map<String, Object> meta = clip.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    Assert.assertTrue(meta.containsKey("aggregate_stats"));
    Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
    Assert.assertEquals(stats.size(), 5);
    Assert.assertTrue((Boolean) stats.get("isEstimate"));
    Assert.assertEquals(stats.get("family").toString(), Family.ALPHA.getFamilyName());
    int size = (Integer) stats.get("size");
    // We inserted more than 512 unique entries
    Assert.assertTrue(size > 512);
    double theta = (Double) stats.get("theta");
    Assert.assertTrue(theta <= 1.0);
    Assert.assertTrue(stats.containsKey("stddev"));
    Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
    Assert.assertEquals(standardDeviations.size(), 3);
    Assert.assertEquals(clip.getRecords().size(), 1);
    BulletRecord actual = clip.getRecords().get(0);
    double roundedEstimate = (Long) actual.typedGet(DEFAULT_NAME).getValue();
    double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
    double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
    double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
    double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
    double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
    double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
    // Should be ok since the bounds are relatively big
    Assert.assertTrue(roundedEstimate >= lowerOneSigma);
    Assert.assertTrue(roundedEstimate <= upperOneSigma);
    Assert.assertTrue(roundedEstimate >= lowerTwoSigma);
    Assert.assertTrue(roundedEstimate <= upperTwoSigma);
    Assert.assertTrue(roundedEstimate >= lowerThreeSigma);
    Assert.assertTrue(roundedEstimate <= upperThreeSigma);
    Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
    Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
Also used : IntStream(java.util.stream.IntStream) BulletRecord(com.yahoo.bullet.record.BulletRecord) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) Family(com.yahoo.sketches.Family) KMVSketch(com.yahoo.bullet.querying.aggregations.sketches.KMVSketch) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) CountDistinct(com.yahoo.bullet.query.aggregations.CountDistinct) Arrays.asList(java.util.Arrays.asList) BulletConfig(com.yahoo.bullet.common.BulletConfig) Map(java.util.Map) Collections(java.util.Collections) ResizeFactor(com.yahoo.sketches.ResizeFactor) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Map(java.util.Map) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Aggregations

BulletConfig (com.yahoo.bullet.common.BulletConfig)11 CountDistinct (com.yahoo.bullet.query.aggregations.CountDistinct)11 Test (org.testng.annotations.Test)9 TestHelpers.addMetadata (com.yahoo.bullet.TestHelpers.addMetadata)8 KMVSketch (com.yahoo.bullet.querying.aggregations.sketches.KMVSketch)8 BulletRecord (com.yahoo.bullet.record.BulletRecord)8 Clip (com.yahoo.bullet.result.Clip)8 Concept (com.yahoo.bullet.result.Meta.Concept)8 RecordBox (com.yahoo.bullet.result.RecordBox)8 Family (com.yahoo.sketches.Family)8 ResizeFactor (com.yahoo.sketches.ResizeFactor)8 Arrays.asList (java.util.Arrays.asList)8 Collections (java.util.Collections)8 List (java.util.List)8 Map (java.util.Map)8 IntStream (java.util.stream.IntStream)8 Pair (org.apache.commons.lang3.tuple.Pair)8 Assert (org.testng.Assert)8 BulletConfigTest (com.yahoo.bullet.common.BulletConfigTest)1 Projection (com.yahoo.bullet.query.Projection)1