Search in sources :

Example 11 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class ThetaSketchTest method testUpdatingForApproximateResult.

@Test
public void testUpdatingForApproximateResult() {
    ThetaSketch sketch = new ThetaSketch(ResizeFactor.X4, Family.ALPHA, 1.0f, 512);
    IntStream.range(0, 1024).forEach(i -> sketch.update(String.valueOf(i)));
    Map<String, String> metaKeys = new HashMap<>();
    metaKeys.put(Concept.SKETCH_ESTIMATED_RESULT.getName(), "isEst");
    Clip result = sketch.getResult("meta", metaKeys);
    Map<String, Object> actualMeta = result.getMeta().asMap();
    Assert.assertTrue(actualMeta.containsKey("meta"));
    Map<String, Object> stats = (Map<String, Object>) actualMeta.get("meta");
    Assert.assertEquals(stats.size(), 1);
    Assert.assertTrue((Boolean) stats.get("isEst"));
    Assert.assertEquals(result.getRecords().size(), 1);
    double actual = (Double) result.getRecords().get(0).get(ThetaSketch.COUNT_FIELD);
    // We better be at least 50% accurate with 512 entries and 1024 uniques
    Assert.assertTrue(actual > 512);
    Assert.assertTrue(actual < 1536);
    Assert.assertEquals(sketch.getRecords(), result.getRecords());
    Assert.assertEquals(sketch.getMetadata("meta", metaKeys).asMap(), actualMeta);
}
Also used : Clip(com.yahoo.bullet.result.Clip) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Example 12 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testResetting.

@Test
public void testResetting() {
    List<String> fields = asList("fieldA", "fieldB");
    GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
    IntStream.range(0, 30).mapToObj(i -> recordA).forEach(groupBy::consume);
    IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
    BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
    BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 2);
    assertContains(records, expectedA);
    assertContains(records, expectedB);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
    groupBy.reset();
    BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
    IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
    IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
    IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
    aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 3);
    expectedA = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
    expectedB = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
    BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
    assertContains(records, expectedA);
    assertContains(records, expectedB);
    assertContains(records, expectedC);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Example 13 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testMetadata.

@Test
public void testMetadata() {
    Map<String, String> fields = singletonMap("fieldA", null);
    // Nominal Entries is 32. Aggregation size is also 32
    GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
    // Generate 4 batches of 64 records with 0 - 63 in fieldA.
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 32);
    records.forEach(r -> Assert.assertTrue(Integer.valueOf(r.get("fieldA").toString()) < 64));
    Map<String, Object> meta = aggregate.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
    Assert.assertEquals(stats.size(), 4);
    Assert.assertTrue((Boolean) stats.get("isEstimate"));
    double theta = (Double) stats.get("theta");
    Assert.assertTrue(theta <= 1.0);
    double groupEstimate = (Double) stats.get("uniquesApprox");
    Assert.assertTrue(stats.containsKey("stddev"));
    Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
    Assert.assertEquals(standardDeviations.size(), 3);
    double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
    double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
    double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
    double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
    double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
    double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
    Assert.assertTrue(groupEstimate >= lowerOneSigma);
    Assert.assertTrue(groupEstimate <= upperOneSigma);
    Assert.assertTrue(groupEstimate >= lowerTwoSigma);
    Assert.assertTrue(groupEstimate <= upperTwoSigma);
    Assert.assertTrue(groupEstimate >= lowerThreeSigma);
    Assert.assertTrue(groupEstimate <= upperThreeSigma);
    Assert.assertTrue(groupEstimate <= upperThreeSigma);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Map(java.util.Map) Collections.singletonMap(java.util.Collections.singletonMap) Collections.emptyMap(java.util.Collections.emptyMap) Test(org.testng.annotations.Test)

Example 14 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testMoreGroupsThanNominalEntries.

@Test
public void testMoreGroupsThanNominalEntries() {
    Map<String, String> fields = singletonMap("fieldA", "A");
    // Nominal Entries is 32. Aggregation size is also 32
    GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
    // Generate 4 batches of 64 records with 0 - 63 in fieldA.
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    Map<String, Object> meta = aggregate.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 32);
    Set<String> groups = new HashSet<>();
    for (BulletRecord record : records) {
        groups.add((String) record.get("A"));
        Assert.assertEquals(record.get(COUNT.getName()), 4L);
    }
    Assert.assertEquals(groups.size(), 32);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 15 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testCombiningAndConsuming.

@Test
public void testCombiningAndConsuming() {
    List<String> fields = asList("fieldA", "fieldB");
    GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
    IntStream.range(0, 30).mapToObj(i -> recordA).forEach(groupBy::consume);
    IntStream.range(0, 10).mapToObj(i -> recordB).forEach(groupBy::consume);
    byte[] serialized = groupBy.getData();
    // Remake it
    groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
    IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
    IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
    groupBy.combine(serialized);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 3);
    BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 60L).add("priceSum", 180.0).getRecord();
    BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
    BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
    assertContains(records, expectedA);
    assertContains(records, expectedB);
    assertContains(records, expectedC);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Aggregations

Clip (com.yahoo.bullet.result.Clip)66 Test (org.testng.annotations.Test)55 BulletRecord (com.yahoo.bullet.record.BulletRecord)48 Map (java.util.Map)43 List (java.util.List)33 IntStream (java.util.stream.IntStream)33 Assert (org.testng.Assert)33 BulletConfig (com.yahoo.bullet.common.BulletConfig)32 HashMap (java.util.HashMap)30 BulletError (com.yahoo.bullet.common.BulletError)29 TestHelpers.addMetadata (com.yahoo.bullet.TestHelpers.addMetadata)28 Aggregation (com.yahoo.bullet.parsing.Aggregation)28 AggregationUtils.makeAttributes (com.yahoo.bullet.parsing.AggregationUtils.makeAttributes)28 Concept (com.yahoo.bullet.result.Meta.Concept)28 RecordBox (com.yahoo.bullet.result.RecordBox)28 Family (com.yahoo.sketches.Family)28 Arrays.asList (java.util.Arrays.asList)28 Optional (java.util.Optional)28 Pair (org.apache.commons.lang3.tuple.Pair)28 HashSet (java.util.HashSet)23