Search in sources :

Example 41 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class QuantileSketchTest method testExactCDFWithProvidedPoints.

@Test
public void testExactCDFWithProvidedPoints() {
    // Same results as the testExactPMFWithNumberOfPoints
    QuantileSketch sketch = new QuantileSketch(64, Distribution.Type.CDF, makePoints(0.0, 9.0, 1.0));
    IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
    IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
    Clip result = sketch.getResult(null, null);
    List<BulletRecord> records = result.getRecords();
    for (BulletRecord record : records) {
        String range = (String) record.get(RANGE_FIELD);
        double count = (Double) record.get(COUNT_FIELD);
        double probablity = (Double) record.get(PROBABILITY_FIELD);
        String rangeEnd = getEnd(range);
        if (rangeEnd.equals(POSITIVE_INFINITY)) {
            Assert.assertEquals(count, 60.0);
            Assert.assertEquals(probablity, 1.0);
        } else {
            double end = Double.valueOf(rangeEnd);
            if (end <= 3.0) {
                Assert.assertEquals(count, end * 13.0);
                Assert.assertEquals(probablity, end * 13.0 / 60);
            } else {
                Assert.assertEquals(count, 39.0 + (end - 3) * 3.0);
                Assert.assertEquals(probablity, (39.0 + (end - 3) * 3.0) / 60);
            }
        }
    }
}
Also used : Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Example 42 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class QuantileSketchTest method testExactPMFWithProvidedPoints.

@Test
public void testExactPMFWithProvidedPoints() {
    // Same results as the testExactPMFWithNumberOfPoints
    QuantileSketch sketch = new QuantileSketch(64, Distribution.Type.PMF, makePoints(0.0, 9.0, 1.0));
    IntStream.range(0, 30).forEach(i -> sketch.update(i % 10));
    IntStream.range(0, 30).forEach(i -> sketch.update(i % 3));
    Clip result = sketch.getResult(null, null);
    List<BulletRecord> records = result.getRecords();
    for (BulletRecord record : records) {
        String range = (String) record.get(RANGE_FIELD);
        double count = (Double) record.get(COUNT_FIELD);
        double probablity = (Double) record.get(PROBABILITY_FIELD);
        String rangeStart = getStart(range);
        String rangeEnd = getEnd(range);
        if (rangeStart.equals(NEGATIVE_INFINITY)) {
            Assert.assertEquals(count, 0.0);
            Assert.assertEquals(probablity, 0.0);
        } else if (rangeEnd.equals(POSITIVE_INFINITY)) {
            Assert.assertEquals(count, 3.0);
            Assert.assertEquals(probablity, 1.0 / 20);
        } else {
            double start = Double.valueOf(rangeStart);
            if (start <= 2.0) {
                Assert.assertEquals(count, 13.0);
                Assert.assertEquals(probablity, 13.0 / 60);
            } else {
                Assert.assertEquals(count, 3.0);
                Assert.assertEquals(probablity, 1.0 / 20);
            }
        }
    }
}
Also used : Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Example 43 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class TupleSketchTest method testApproximateMetrics.

@Test
public void testApproximateMetrics() {
    TupleSketch sketch = new TupleSketch(ResizeFactor.X4, 1.0f, 32, 16);
    // Insert 2 duplicates of 0 - 63
    IntStream.range(0, 128).forEach(i -> sketch.update(addToData(String.valueOf(i % 64), 1, data), data));
    Clip result = sketch.getResult("meta", ALL_METADATA);
    Map<String, Object> actualMeta = result.getMeta().asMap();
    Assert.assertTrue(actualMeta.containsKey("meta"));
    Map<String, Object> stats = (Map<String, Object>) actualMeta.get("meta");
    Assert.assertEquals(stats.size(), 5);
    Assert.assertTrue((Boolean) stats.get("isEst"));
    Assert.assertTrue((Double) stats.get("theta") < 1.0);
    Assert.assertEquals((String) stats.get("family"), Family.TUPLE.getFamilyName());
    Map<String, Map<String, Double>> standardDeviations = (Map<String, Map<String, Double>>) stats.get("stddev");
    double upperOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_UB);
    double lowerOneSigma = standardDeviations.get(KMVSketch.META_STD_DEV_1).get(KMVSketch.META_STD_DEV_LB);
    double upperTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_UB);
    double lowerTwoSigma = standardDeviations.get(KMVSketch.META_STD_DEV_2).get(KMVSketch.META_STD_DEV_LB);
    double upperThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_UB);
    double lowerThreeSigma = standardDeviations.get(KMVSketch.META_STD_DEV_3).get(KMVSketch.META_STD_DEV_LB);
    double estimate = (Double) stats.get("est");
    Assert.assertTrue(estimate >= lowerOneSigma);
    Assert.assertTrue(estimate <= upperOneSigma);
    Assert.assertTrue(estimate >= lowerTwoSigma);
    Assert.assertTrue(estimate <= upperTwoSigma);
    Assert.assertTrue(estimate >= lowerThreeSigma);
    Assert.assertTrue(estimate <= upperThreeSigma);
    Assert.assertEquals(result.getRecords().size(), 16);
    for (BulletRecord actual : result.getRecords()) {
        String fieldA = actual.get("A").toString();
        String fieldB = actual.get("B").toString();
        Long count = (Long) actual.get("cnt");
        Double sumB = (Double) actual.get("sumB");
        Double averageA = (Double) actual.get("avgA");
        Assert.assertTrue(Integer.valueOf(fieldA) < 64);
        Assert.assertEquals(Double.valueOf(fieldB), 1.0);
        Assert.assertEquals(count, Long.valueOf(2));
        Assert.assertEquals(sumB, 2.0);
        // A <= 64, so even if count was 1 or 2, this should be < 64
        Assert.assertTrue(averageA < 64.0);
    }
    Assert.assertEquals(sketch.getRecords(), result.getRecords());
    Assert.assertEquals(sketch.getMetadata("meta", ALL_METADATA).asMap(), actualMeta);
}
Also used : Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Example 44 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testCombining.

@Test
public void testCombining() {
    List<String> fields = asList("fieldA", "fieldB");
    GroupBy groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add("price", 1).getRecord();
    IntStream.range(0, 10).mapToObj(i -> recordA).forEach(groupBy::consume);
    IntStream.range(0, 9).mapToObj(i -> recordB).forEach(groupBy::consume);
    IntStream.range(0, 20).mapToObj(i -> recordA).forEach(groupBy::consume);
    groupBy.consume(recordB);
    byte[] firstSerialized = groupBy.getData();
    // Remake it
    groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordC = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordD = RecordBox.get().addNull("fieldA").addNull("fieldB").add("price", 10).getRecord();
    IntStream.range(0, 30).mapToObj(i -> recordC).forEach(groupBy::consume);
    IntStream.range(0, 10).mapToObj(i -> recordD).forEach(groupBy::consume);
    byte[] secondSerialized = groupBy.getData();
    groupBy = makeGroupBy(fields, 5, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    groupBy.combine(firstSerialized);
    groupBy.combine(secondSerialized);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 3);
    // count = 10 + 20 + 30, price = 10*3 + 20*3 + 30*3
    BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 60L).add("priceSum", 180.0).getRecord();
    // count = 9 + 1, price = 9*1 + 1*1
    BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
    // count = 10, price = 10*10
    BulletRecord expectedC = RecordBox.get().add("fieldA", "null").add("fieldB", "null").add(COUNT.getName(), 10L).add("priceSum", 100.0).getRecord();
    assertContains(records, expectedA);
    assertContains(records, expectedB);
    assertContains(records, expectedC);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : IntStream(java.util.stream.IntStream) AVG(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.AVG) GROUP(com.yahoo.bullet.parsing.Aggregation.Type.GROUP) COUNT(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT) BulletError(com.yahoo.bullet.common.BulletError) Concept(com.yahoo.bullet.result.Meta.Concept) Test(org.testng.annotations.Test) AggregationUtils.makeGroupOperation(com.yahoo.bullet.parsing.AggregationUtils.makeGroupOperation) RecordBox(com.yahoo.bullet.result.RecordBox) Clip(com.yahoo.bullet.result.Clip) SUM(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.SUM) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) COUNT_FIELD(com.yahoo.bullet.aggregations.grouping.GroupOperation.GroupOperationType.COUNT_FIELD) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) AggregationUtils.makeAttributes(com.yahoo.bullet.parsing.AggregationUtils.makeAttributes) Collections.singletonMap(java.util.Collections.singletonMap) TestHelpers.assertContains(com.yahoo.bullet.TestHelpers.assertContains) AggregationUtils.makeGroupFields(com.yahoo.bullet.parsing.AggregationUtils.makeGroupFields) Collections.emptyMap(java.util.Collections.emptyMap) BulletRecord(com.yahoo.bullet.record.BulletRecord) Aggregation(com.yahoo.bullet.parsing.Aggregation) KMVSketch(com.yahoo.bullet.aggregations.sketches.KMVSketch) Set(java.util.Set) GroupOperation(com.yahoo.bullet.aggregations.grouping.GroupOperation) List(java.util.List) TestHelpers.addMetadata(com.yahoo.bullet.TestHelpers.addMetadata) BulletConfig(com.yahoo.bullet.common.BulletConfig) Optional(java.util.Optional) Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Example 45 with Clip

use of com.yahoo.bullet.result.Clip in project bullet-core by yahoo.

the class GroupByTest method testGroupByOperations.

@Test
public void testGroupByOperations() {
    List<String> fields = asList("fieldA", "fieldB");
    GroupBy groupBy = makeGroupBy(fields, 3, makeGroupOperation(COUNT, null, null), makeGroupOperation(SUM, "price", "priceSum"));
    BulletRecord recordA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add("price", 3).getRecord();
    BulletRecord recordB = RecordBox.get().addNull("fieldA").add("fieldB", "bar").add("price", 1).getRecord();
    IntStream.range(0, 10).forEach(i -> groupBy.consume(recordA));
    IntStream.range(0, 9).forEach(i -> groupBy.consume(recordB));
    IntStream.range(0, 20).forEach(i -> groupBy.consume(recordA));
    groupBy.consume(recordB);
    Clip aggregate = groupBy.getResult();
    Assert.assertNotNull(aggregate);
    Map<String, Object> meta = aggregate.getMeta().asMap();
    Assert.assertEquals(meta.size(), 1);
    List<BulletRecord> records = aggregate.getRecords();
    Assert.assertEquals(records.size(), 2);
    // count = 10 + 20, price = 10*3 + 20*3
    BulletRecord expectedA = RecordBox.get().add("fieldA", "foo").add("fieldB", "bar").add(COUNT.getName(), 30L).add("priceSum", 90.0).getRecord();
    // count = 9 + 1, price = 9*1 + 1*1
    BulletRecord expectedB = RecordBox.get().add("fieldA", "null").add("fieldB", "bar").add(COUNT.getName(), 10L).add("priceSum", 10.0).getRecord();
    assertContains(records, expectedA);
    assertContains(records, expectedB);
    Assert.assertEquals(groupBy.getRecords(), aggregate.getRecords());
    Assert.assertEquals(groupBy.getMetadata().asMap(), aggregate.getMeta().asMap());
}
Also used : Clip(com.yahoo.bullet.result.Clip) BulletRecord(com.yahoo.bullet.record.BulletRecord) Test(org.testng.annotations.Test)

Aggregations

Clip (com.yahoo.bullet.result.Clip)66 Test (org.testng.annotations.Test)55 BulletRecord (com.yahoo.bullet.record.BulletRecord)48 Map (java.util.Map)43 List (java.util.List)33 IntStream (java.util.stream.IntStream)33 Assert (org.testng.Assert)33 BulletConfig (com.yahoo.bullet.common.BulletConfig)32 HashMap (java.util.HashMap)30 BulletError (com.yahoo.bullet.common.BulletError)29 TestHelpers.addMetadata (com.yahoo.bullet.TestHelpers.addMetadata)28 Aggregation (com.yahoo.bullet.parsing.Aggregation)28 AggregationUtils.makeAttributes (com.yahoo.bullet.parsing.AggregationUtils.makeAttributes)28 Concept (com.yahoo.bullet.result.Meta.Concept)28 RecordBox (com.yahoo.bullet.result.RecordBox)28 Family (com.yahoo.sketches.Family)28 Arrays.asList (java.util.Arrays.asList)28 Optional (java.util.Optional)28 Pair (org.apache.commons.lang3.tuple.Pair)28 HashSet (java.util.HashSet)23