use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.
the class GroupByTest method testMetadata.
@Test
public void testMetadata() {
Map<String, String> fields = singletonMap("fieldA", null);
// Nominal Entries is 32. Aggregation size is also 32
GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
// Generate 4 batches of 64 records with 0 - 63 in fieldA.
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 32);
records.forEach(r -> Assert.assertTrue(Integer.valueOf(r.get("fieldA").toString()) < 64));
Map<String, Object> meta = aggregate.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
Map<String, Object> stats = (Map<String, Object>) meta.get("aggregate_stats");
Assert.assertEquals(stats.size(), 4);
Assert.assertTrue((Boolean) stats.get("isEstimate"));
double theta = (Double) stats.get("theta");
Assert.assertTrue(theta <= 1.0);
double groupEstimate = (Double) stats.get("uniquesApprox");
Assert.assertTrue(stats.containsKey("stddev"));
Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
Assert.assertEquals(standardDeviations.size(), 3);
double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
Assert.assertTrue(groupEstimate >= lowerOneSigma);
Assert.assertTrue(groupEstimate <= upperOneSigma);
Assert.assertTrue(groupEstimate >= lowerTwoSigma);
Assert.assertTrue(groupEstimate <= upperTwoSigma);
Assert.assertTrue(groupEstimate >= lowerThreeSigma);
Assert.assertTrue(groupEstimate <= upperThreeSigma);
Assert.assertTrue(groupEstimate <= upperThreeSigma);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.
the class GroupByTest method testMoreGroupsThanNominalEntries.
@Test
public void testMoreGroupsThanNominalEntries() {
Map<String, String> fields = singletonMap("fieldA", "A");
// Nominal Entries is 32. Aggregation size is also 32
GroupBy groupBy = makeGroupBy(makeConfiguration(32), fields, 32, singletonList(makeGroupOperation(COUNT, null, null)), ALL_METADATA);
// Generate 4 batches of 64 records with 0 - 63 in fieldA.
IntStream.range(0, 256).mapToObj(i -> RecordBox.get().add("fieldA", i % 64).getRecord()).forEach(groupBy::consume);
Clip aggregate = groupBy.getResult();
Assert.assertNotNull(aggregate);
Map<String, Object> meta = aggregate.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
List<BulletRecord> records = aggregate.getRecords();
Assert.assertEquals(records.size(), 32);
Set<String> groups = new HashSet<>();
for (BulletRecord record : records) {
groups.add((String) record.get("A"));
Assert.assertEquals(record.get(COUNT.getName()), 4L);
}
Assert.assertEquals(groups.size(), 32);
Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.
the class DistributionTest method testInitialize.
@Test
public void testInitialize() {
Optional<List<BulletError>> optionalErrors;
List<BulletError> errors;
Aggregation aggregation = new Aggregation();
aggregation.setSize(20);
Distribution distribution = new Distribution(aggregation, new BulletConfig());
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 1);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_ONE_FIELD_ERROR);
aggregation.setFields(Collections.singletonMap("foo", "bar"));
distribution = new Distribution(aggregation, new BulletConfig());
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 1);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_TYPE_ERROR);
aggregation.setAttributes(Collections.singletonMap(Distribution.TYPE, "foo"));
distribution = new Distribution(aggregation, new BulletConfig());
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 1);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_TYPE_ERROR);
Map<String, Object> attributes = new HashMap<>();
attributes.put(Distribution.TYPE, Distribution.Type.CDF.getName());
attributes.put(Distribution.NUMBER_OF_POINTS, 10L);
aggregation.setAttributes(attributes);
distribution = new Distribution(aggregation, new BulletConfig());
Assert.assertFalse(distribution.initialize().isPresent());
}
use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.
the class DistributionTest method testProvidedPointsInitialization.
@Test
public void testProvidedPointsInitialization() {
Aggregation aggregation = new Aggregation();
aggregation.setSize(20);
aggregation.setFields(Collections.singletonMap("foo", "bar"));
Distribution distribution = new Distribution(aggregation, new BulletConfig());
Optional<List<BulletError>> optionalErrors;
List<BulletError> errors;
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, asList(0.4, 0.03, 0.99, 0.5, 14.0)));
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 2);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, null));
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 2);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.emptyList()));
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 2);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.singletonList(2.0)));
optionalErrors = distribution.initialize();
Assert.assertTrue(optionalErrors.isPresent());
errors = optionalErrors.get();
Assert.assertEquals(errors.size(), 2);
Assert.assertEquals(errors.get(0), Distribution.REQUIRES_POINTS_ERROR);
Assert.assertEquals(errors.get(1), Distribution.REQUIRES_POINTS_PROPER_RANGE);
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, Collections.singletonList(1.0)));
optionalErrors = distribution.initialize();
Assert.assertFalse(distribution.initialize().isPresent());
aggregation.setAttributes(makeAttributes(Distribution.Type.QUANTILE, asList(0.4, 0.03, 0.99, 0.5, 0.35)));
optionalErrors = distribution.initialize();
Assert.assertFalse(optionalErrors.isPresent());
aggregation.setAttributes(makeAttributes(Distribution.Type.PMF, Collections.singletonList(0.4)));
optionalErrors = distribution.initialize();
Assert.assertFalse(optionalErrors.isPresent());
aggregation.setAttributes(makeAttributes(Distribution.Type.PMF, asList(0.4, 0.03, 0.99, 0.5, 14.0)));
optionalErrors = distribution.initialize();
Assert.assertFalse(optionalErrors.isPresent());
}
use of com.yahoo.bullet.parsing.Aggregation in project bullet-core by yahoo.
the class DistributionTest method testNegativeSize.
@Test
public void testNegativeSize() {
// MAX_POINTS is configured to -1 and we will use the min BulletConfig.DEFAULT_DISTRIBUTION_AGGREGATION_MAX_POINTS
// and aggregation size, which is 1
Distribution distribution = makeDistribution(makeConfiguration(-1, 128), makeAttributes(Distribution.Type.PMF, 10L), "field", 1, ALL_METADATA);
IntStream.range(0, 100).mapToDouble(i -> i).mapToObj(d -> RecordBox.get().add("field", d).getRecord()).forEach(distribution::consume);
Clip result = distribution.getResult();
Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
Assert.assertEquals(metadata.size(), 7);
Assert.assertFalse((Boolean) metadata.get("isEst"));
List<BulletRecord> records = result.getRecords();
Assert.assertEquals(records.size(), 2);
BulletRecord expectedA = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + 0.0 + END_EXCLUSIVE).add(COUNT_FIELD, 0.0).add(PROBABILITY_FIELD, 0.0).getRecord();
BulletRecord expectedB = RecordBox.get().add(RANGE_FIELD, START_INCLUSIVE + 0.0 + SEPARATOR + POSITIVE_INFINITY_END).add(COUNT_FIELD, 100.0).add(PROBABILITY_FIELD, 1.0).getRecord();
Assert.assertEquals(records.get(0), expectedA);
Assert.assertEquals(records.get(1), expectedB);
Assert.assertEquals(distribution.getRecords(), result.getRecords());
Assert.assertEquals(distribution.getMetadata().asMap(), result.getMeta().asMap());
}
Aggregations