Search in sources :

Example 1 with Aggregation

use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.

the class GroupByTest method testMetadata.

@Test
public void testMetadata() {
    Map<String, String> fields = singletonMap("fieldA", null);
    // Nominal Entries is 32. Aggregation size is also 32
    GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
    // Generate 4 batches of 64 records with 0 - 63 in fieldA.
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 32);
    records.forEach(r -> Assert.assertTrue(Integer.valueOf(r.get("fieldA").toString()) < 64));
    Map<String, Object> meta = aggregate.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
    Assert.assertEquals(stats.size(), 4);
    Assert.assertTrue((Boolean) stats.get("isEstimate"));
    double theta = (Double) stats.get("theta");
    Assert.assertTrue(theta <= 1.0);
    double groupEstimate = (Double) stats.get("uniquesApprox");
    Assert.assertTrue(stats.containsKey("stddev"));
    Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
    Assert.assertEquals(standardDeviations.size(), 3);
    double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
    double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
    double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
    double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
    double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
    double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
    Assert.assertTrue(groupEstimate >= lowerOneSigma);
    Assert.assertTrue(groupEstimate <= upperOneSigma);
    Assert.assertTrue(groupEstimate >= lowerTwoSigma);
    Assert.assertTrue(groupEstimate <= upperTwoSigma);
    Assert.assertTrue(groupEstimate >= lowerThreeSigma);
    Assert.assertTrue(groupEstimate <= upperThreeSigma);
    Assert.assertTrue(groupEstimate <= upperThreeSigma);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Map(java.util.Map) Collections.singletonMap(java.util.Collections.singletonMap) Collections.emptyMap(java.util.Collections.emptyMap) Test(org.testng.annotations.Test)

Example 2 with Aggregation

use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.

the class GroupByTest method testMoreGroupsThanNominalEntries.

@Test
public void testMoreGroupsThanNominalEntries() {
    Map<String, String> fields = singletonMap("fieldA", "A");
    // Nominal Entries is 32. Aggregation size is also 32
    GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
    // Generate 4 batches of 64 records with 0 - 63 in fieldA.
    IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    Map<String, Object> meta = aggregate.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 32);
    Set<String> groups = new HashSet<>();
    for (BulletRecord record : records) {
        groups.add((String) record.get("A"));
        Assert.assertEquals(record.get(COUNT.getName()), 4L);
    }
    Assert.assertEquals(groups.size(), 32);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 3 with Aggregation

use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.

the class DistributionTest method testInitialize.

@Test
public void testInitialize() {
    Optional<List<BulletError>> optionalErrors;
    List<BulletError> errors;
    Aggregation aggregation = new Aggregation();
    aggregation.setSize(20);
    Distribution distribution = new Distribution(aggregation, new BulletConfig());
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 1);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_ONE_FIELD_ERROR);
    aggregation.setFields(Collections.singletonMap("foo", "bar"));
    distribution = new Distribution(aggregation, new BulletConfig());
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 1);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_TYPE_ERROR);
    aggregation.setAttributes(Collections.singletonMap(Distribution.TYPE, "foo"));
    distribution = new Distribution(aggregation, new BulletConfig());
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 1);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_TYPE_ERROR);
    Map<String, Object> attributes = new HashMap<>();
    attributes.put(Distribution.TYPE, Distribution.Type.CDF.getName());
    attributes.put(Distribution.NUMBER_OF_POINTS, 10L);
    aggregation.setAttributes(attributes);
    distribution = new Distribution(aggregation, new BulletConfig());
    Assert.assertFalse(distribution.initialize().isPresent());
}
Also used : Aggregation(com.yahoo.bullet.parsing.Aggregation) HashMap(java.util.HashMap) Arrays.asList(java.util.Arrays.asList) List(java.util.List) BulletError(com.yahoo.bullet.common.BulletError) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Example 4 with Aggregation

use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.

the class DistributionTest method testProvidedPointsInitialization.

@Test
public void testProvidedPointsInitialization() {
    Aggregation aggregation = new Aggregation();
    aggregation.setSize(20);
    aggregation.setFields(Collections.singletonMap("foo", "bar"));
    Distribution distribution = new Distribution(aggregation, new BulletConfig());
    Optional<List<BulletError>> optionalErrors;
    List<BulletError> errors;
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, asList(0.4, 0.03, 0.99, 0.5, 14.0)));
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 2);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
    Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, null));
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 2);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
    Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.emptyList()));
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 2);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
    Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.singletonList(2.0)));
    optionalErrors = distribution.initialize();
    Assert.assertTrue(optionalErrors.isPresent());
    errors = optionalErrors.get();
    Assert.assertEquals(errors.size(), 2);
    Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
    Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.singletonList(1.0)));
    optionalErrors = distribution.initialize();
    Assert.assertFalse(distribution.initialize().isPresent());
    aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, asList(0.4, 0.03, 0.99, 0.5, 0.35)));
    optionalErrors = distribution.initialize();
    Assert.assertFalse(optionalErrors.isPresent());
    aggregation.setAttributes(makeAttributes(Distribution.Type.PMF, Collections.singletonList(0.4)));
    optionalErrors = distribution.initialize();
    Assert.assertFalse(optionalErrors.isPresent());
    aggregation.setAttributes(makeAttributes(Distribution.Type.PMF, asList(0.4, 0.03, 0.99, 0.5, 14.0)));
    optionalErrors = distribution.initialize();
    Assert.assertFalse(optionalErrors.isPresent());
}
Also used : Aggregation(com.yahoo.bullet.parsing.Aggregation) Arrays.asList(java.util.Arrays.asList) List(java.util.List) BulletError(com.yahoo.bullet.common.BulletError) BulletConfig(com.yahoo.bullet.common.BulletConfig) Test(org.testng.annotations.Test)

Example 5 with Aggregation

use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.

the class DistributionTest method testNegativeSize.

@Test
public void testNegativeSize() {
    // MAX_POINTS is configured to -1 and we will use the min BulletConfig.DEFAULT_DISTRIBUTION_AGGREGATION_MAX_POINTS
    // and aggregation size, which is 1
    Distribution distribution = makeDistribution(makeConfiguration(-1, 128), makeAttributes(Distribution.Type.PMF, 10L), "field", 1, ALL_METADATA);
    IntStream.range(0, 100).mapToDouble(i -> i).mapToObj(d -> RecordBox.get().add("field", d).getRecord()).forEach(distribution::consume);
    Clip result = distribution.getResult();
    Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
    Assert.assertEquals(metadata.size(), 7);
    Assert.assertFalse((Boolean) metadata.get("isEst"));
    List<BulletRecord> records = result.getRecords();
    Assert.assertEquals(records.size(), 2);
    BulletRecord expectedA = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + 0.0 + END_EXCLUSIVE).add(COUNT_FIELD, 0.0).add(PROBABILITY_FIELD, 0.0).getRecord();
    BulletRecord expectedB = RecordBox.get().add(RANGE_FIELD, START_INCLUSIVE + 0.0 + SEPARATOR + POSITIVE_INFINITY_END).add(COUNT_FIELD, 100.0).add(PROBABILITY_FIELD, 1.0).getRecord();
    Assert.assertEquals(records.get(0), expectedA);
    Assert.assertEquals(records.get(1), expectedB);
    Assert.assertEquals(distribution.getRecords(), result.getRecords());
    Assert.assertEquals(distribution.getMetadata().asMap(), result.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) HashMap(java.util.HashMap) RecordBox(com.yahoo.bullet.result.RecordBox) PROBABILITY_FIELD(com.yahoo.bullet.aggregations.sketches.QuantileSketch.PROBABILITY_FIELD) Clip(com.yahoo.bullet.result.Clip) Family(com.yahoo.sketches.Family) SEPARATOR(com.yahoo.bullet.aggregations.sketches.QuantileSketch.SEPARATOR) HashSet(java.util.HashSet) VALUE_FIELD(com.yahoo.bullet.aggregations.sketches.QuantileSketch.VALUE_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) QUANTILE_FIELD(com.yahoo.bullet.aggregations.sketches.QuantileSketch.QUANTILE_FIELD) BulletRecord(com.yahoo.bullet.record.BulletRecord) DoublesSketch(com.yahoo.sketches.quantiles.DoublesSketch) Aggregation(com.yahoo.bullet.parsing.Aggregation) COUNT_FIELD(com.yahoo.bullet.aggregations.sketches.QuantileSketch.COUNT_FIELD) POSITIVE_INFINITY_END(com.yahoo.bullet.aggregations.sketches.QuantileSketch.POSITIVE_INFINITY_END) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) TestHelpers.assertApproxEquals(com.yahoo.bullet.TestHelpers.assertApproxEquals) START_INCLUSIVE(com.yahoo.bullet.aggregations.sketches.QuantileSketch.START_INCLUSIVE) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) NEGATIVE_INFINITY_START(com.yahoo.bullet.aggregations.sketches.QuantileSketch.NEGATIVE_INFINITY_START) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) RANGE_FIELD(com.yahoo.bullet.aggregations.sketches.QuantileSketch.RANGE_FIELD) Collections(java.util.Collections) END_EXCLUSIVE(com.yahoo.bullet.aggregations.sketches.QuantileSketch.END_EXCLUSIVE) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Aggregations

Aggregation (com.yahoo.bullet.parsing.Aggregation)39 Test (org.testng.annotations.Test)31 BulletConfig (com.yahoo.bullet.common.BulletConfig)30 Query (com.yahoo.bullet.parsing.Query)12 List (java.util.List)11 BulletError (com.yahoo.bullet.common.BulletError)10 Arrays.asList (java.util.Arrays.asList)10 QueryUtils.makeAggregationQuery (com.yahoo.bullet.parsing.QueryUtils.makeAggregationQuery)7 QueryUtils.makeProjectionFilterQuery (com.yahoo.bullet.parsing.QueryUtils.makeProjectionFilterQuery)5 QueryUtils.makeRawFullQuery (com.yahoo.bullet.parsing.QueryUtils.makeRawFullQuery)5 Window (com.yahoo.bullet.parsing.Window)5 Map (java.util.Map)4 TestHelpers.addMetadata (com.yahoo.bullet.TestHelpers.addMetadata)3 AggregationUtils.makeAttributes (com.yahoo.bullet.parsing.AggregationUtils.makeAttributes)3 BulletRecord (com.yahoo.bullet.record.BulletRecord)3 Clip (com.yahoo.bullet.result.Clip)3 Concept (com.yahoo.bullet.result.Meta.Concept)3 RecordBox (com.yahoo.bullet.result.RecordBox)3 Collections.singletonList (java.util.Collections.singletonList)3 Collections.singletonMap (java.util.Collections.singletonMap)3