use of com.yahoo.bullet.record.BulletRecord in project bullet-core by yahoo.
the class CountDistinctTest method testSingleFieldExactCountDistinctWithDuplicates.
@Test
public void testSingleFieldExactCountDistinctWithDuplicates() {
CountDistinct countDistinct = makeCountDistinct(asList("field"));
IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
IntStream.range(0, 1000).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
Assert.assertNotNull(countDistinct.getData());
List<BulletRecord> aggregate = countDistinct.getResult().getRecords();
Assert.assertEquals(aggregate.size(), 1);
BulletRecord actual = aggregate.get(0);
BulletRecord expected = RecordBox.get().add(CountDistinct.DEFAULT_NEW_NAME, 1000.0).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), aggregate);
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.record.BulletRecord in project bullet-core by yahoo.
the class CountDistinctTest method testMultipleFieldsCountDistinctAmbiguity.
@Test
public void testMultipleFieldsCountDistinctAmbiguity() {
BulletConfig config = makeConfiguration(4, 512);
String s = BulletConfig.DEFAULT_AGGREGATION_COMPOSITE_FIELD_SEPARATOR;
CountDistinct countDistinct = makeCountDistinct(config, makeAttributes("myCount"), asList("fieldA", "fieldB"));
BulletRecord first = RecordBox.get().add("fieldA", s).add("fieldB", s + s).getRecord();
BulletRecord second = RecordBox.get().add("fieldA", s + s).add("fieldB", s).getRecord();
// first and second will look the same to the Sketch. third will not
BulletRecord third = RecordBox.get().add("fieldA", s + s).add("fieldB", s + s).getRecord();
countDistinct.consume(first);
countDistinct.consume(second);
countDistinct.consume(third);
Clip clip = countDistinct.getResult();
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
BulletRecord expected = RecordBox.get().add("myCount", 2.0).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.record.BulletRecord in project bullet-core by yahoo.
the class CountDistinctTest method testCombiningExact.
@Test
public void testCombiningExact() {
BulletConfig config = makeConfiguration(4, 1024);
CountDistinct countDistinct = makeCountDistinct(config, makeAttributes("myCount"), asList("field"));
IntStream.range(0, 512).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
byte[] firstAggregate = countDistinct.getData();
// Another one
countDistinct = makeCountDistinct(config, makeAttributes("myCount"), asList("field"));
IntStream.range(256, 768).mapToObj(i -> RecordBox.get().add("field", i).getRecord()).forEach(countDistinct::consume);
byte[] secondAggregate = countDistinct.getData();
// Final one
countDistinct = makeCountDistinct(config, makeAttributes("myCount"), asList("field"), Pair.of(Concept.SKETCH_METADATA, "stats"), Pair.of(Concept.SKETCH_ESTIMATED_RESULT, "est"));
countDistinct.combine(firstAggregate);
countDistinct.combine(secondAggregate);
Clip clip = countDistinct.getResult();
Map<String, Object> meta = clip.getMeta().asMap();
Assert.assertEquals(meta.size(), 1);
Assert.assertTrue(meta.containsKey("stats"));
Map<String, Object> stats = (Map<String, Object>) meta.get("stats");
Assert.assertEquals(stats.size(), 1);
Assert.assertFalse((Boolean) stats.get("est"));
Assert.assertEquals(clip.getRecords().size(), 1);
BulletRecord actual = clip.getRecords().get(0);
BulletRecord expected = RecordBox.get().add("myCount", 768.0).getRecord();
Assert.assertEquals(actual, expected);
Assert.assertEquals(countDistinct.getRecords(), clip.getRecords());
Assert.assertEquals(countDistinct.getMetadata().asMap(), countDistinct.getMetadata().asMap());
}
use of com.yahoo.bullet.record.BulletRecord in project bullet-core by yahoo.
the class DistributionTest method testCasting.
@Test
public void testCasting() {
Distribution distribution = makeDistribution(Distribution.Type.PMF, Collections.singletonList(50.0));
IntStream.range(0, 25).mapToObj(String::valueOf).map(s -> RecordBox.get().add("field", s).getRecord()).forEach(distribution::consume);
distribution.consume(RecordBox.get().add("field", "garbage").getRecord());
distribution.consume(RecordBox.get().add("field", "1.0 garbage").getRecord());
IntStream.range(50, 100).mapToDouble(i -> i).mapToObj(d -> RecordBox.get().add("field", d).getRecord()).forEach(distribution::consume);
Clip result = distribution.getResult();
Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
Assert.assertEquals(metadata.size(), 7);
Assert.assertFalse((Boolean) metadata.get("isEst"));
List<BulletRecord> records = result.getRecords();
Assert.assertEquals(records.size(), 2);
BulletRecord expectedA = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + 50.0 + END_EXCLUSIVE).add(COUNT_FIELD, 25.0).add(PROBABILITY_FIELD, 1.0 / 3).getRecord();
BulletRecord expectedB = RecordBox.get().add(RANGE_FIELD, START_INCLUSIVE + 50.0 + SEPARATOR + POSITIVE_INFINITY_END).add(COUNT_FIELD, 50.0).add(PROBABILITY_FIELD, 2.0 / 3).getRecord();
Assert.assertEquals(records.get(0), expectedA);
Assert.assertEquals(records.get(1), expectedB);
Assert.assertEquals(distribution.getRecords(), result.getRecords());
Assert.assertEquals(distribution.getMetadata().asMap(), result.getMeta().asMap());
}
use of com.yahoo.bullet.record.BulletRecord in project bullet-core by yahoo.
the class DistributionTest method testCombining.
@Test
public void testCombining() {
Distribution distribution = makeDistribution(Distribution.Type.CDF, asList(5.0, 2.5));
IntStream.range(0, 25).mapToDouble(i -> (i * 0.1)).mapToObj(d -> RecordBox.get().add("field", d).getRecord()).forEach(distribution::consume);
Distribution anotherDistribution = makeDistribution(Distribution.Type.CDF, asList(5.0, 2.5));
IntStream.range(50, 100).mapToDouble(i -> (i * 0.1)).mapToObj(d -> RecordBox.get().add("field", d).getRecord()).forEach(anotherDistribution::consume);
Distribution union = makeDistribution(Distribution.Type.CDF, asList(5.0, 2.5));
union.combine(distribution.getData());
union.combine(anotherDistribution.getData());
Clip result = union.getResult();
Map<String, Object> metadata = (Map<String, Object>) result.getMeta().asMap().get("meta");
Assert.assertEquals(metadata.size(), 7);
Assert.assertFalse((Boolean) metadata.get("isEst"));
List<BulletRecord> records = result.getRecords();
Assert.assertEquals(records.size(), 3);
BulletRecord expectedA = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + 2.5 + END_EXCLUSIVE).add(COUNT_FIELD, 25.0).add(PROBABILITY_FIELD, 1.0 / 3).getRecord();
BulletRecord expectedB = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + 5.0 + END_EXCLUSIVE).add(COUNT_FIELD, 25.0).add(PROBABILITY_FIELD, 1.0 / 3).getRecord();
BulletRecord expectedC = RecordBox.get().add(RANGE_FIELD, NEGATIVE_INFINITY_START + SEPARATOR + POSITIVE_INFINITY_END).add(COUNT_FIELD, 75.0).add(PROBABILITY_FIELD, 1.0).getRecord();
Assert.assertEquals(records.get(0), expectedA);
Assert.assertEquals(records.get(1), expectedB);
Assert.assertEquals(records.get(2), expectedC);
Assert.assertEquals(union.getRecords(), records);
Assert.assertEquals(union.getMetadata().asMap(), result.getMeta().asMap());
}
Aggregations