Search in sources :

Example 21 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method ingestingSketches.

@Test
public void ingestingSketches() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_sketch_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"sketch\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024},", "  {\"type\": \"arrayOfDoublesSketch\", \"name\": \"non_existing_sketch\", \"fieldName\": \"non_existing_sketch\"}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024},", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"non_existing_sketch\", \"fieldName\": \"non_existing_sketch\"}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimateAndBounds\", \"name\": \"estimateAndBounds\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, \"numStdDevs\": 2},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToString\", \"name\": \"summary\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToVariances\", \"name\": \"variances\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0);
    Assert.assertEquals("non_existing_sketch", 0, (double) row.get(1), 0);
    Assert.assertEquals("estimate", 40.0, (double) row.get(2), 0);
    Assert.assertArrayEquals("estimateAndBounds", new double[] { 40.0, 40.0, 40.0 }, (double[]) row.get(3), 0);
    Assert.assertEquals("union", 40.0, (double) row.get(5), 0);
    Assert.assertEquals("intersection", 40.0, (double) row.get(6), 0);
    Assert.assertEquals("anotb", 0, (double) row.get(7), 0);
    Assert.assertArrayEquals("variances", new double[] { 0.0 }, (double[]) row.get(9), 0);
    // quantiles-sketch
    Object obj = row.get(4);
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(40, ds.getN());
    Assert.assertEquals(1.0, ds.getMinValue(), 0);
    Assert.assertEquals(1.0, ds.getMaxValue(), 0);
    final String expectedSummary = "### HeapArrayOfDoublesCompactSketch SUMMARY: \n" + "   Estimate                : 40.0\n" + "   Upper Bound, 95% conf   : 40.0\n" + "   Lower Bound, 95% conf   : 40.0\n" + "   Theta (double)          : 1.0\n" + "   Theta (long)            : 9223372036854775807\n" + "   EstMode?                : false\n" + "   Empty?                  : false\n" + "   Retained Entries        : 40\n" + "   Seed Hash               : 93cc | 37836\n" + "### END SKETCH SUMMARY\n";
    Assert.assertEquals("summary", expectedSummary, row.get(8));
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 22 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtIngestionTime.

@Test
public void buildingSketchesAtIngestionTime() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"size\": 1024}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0);
    Assert.assertEquals("estimate", 40.0, (double) row.get(1), 0);
    Assert.assertEquals("union", 40.0, (double) row.get(3), 0);
    Assert.assertEquals("intersection", 40.0, (double) row.get(4), 0);
    Assert.assertEquals("anotb", 0, (double) row.get(5), 0);
    // quantiles-sketch
    Object obj = row.get(2);
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(40, ds.getN());
    Assert.assertEquals(1.0, ds.getMinValue(), 0);
    Assert.assertEquals(1.0, ds.getMaxValue(), 0);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 23 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchToQuantilesSketchPostAggregatorTest method testComparator.

@Test
public void testComparator() {
    ArrayOfDoublesUpdatableSketch s1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(16).setNumberOfValues(2).build();
    s1.update("foo", new double[] { 1.0, 2.0 });
    ArrayOfDoublesUpdatableSketch s2 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(16).setNumberOfValues(2).build();
    s2.update("foo", new double[] { 2.0, 2.0 });
    s2.update("bar", new double[] { 3.0, 4.0 });
    PostAggregator field1 = EasyMock.createMock(PostAggregator.class);
    EasyMock.expect(field1.compute(EasyMock.anyObject(Map.class))).andReturn(s1).anyTimes();
    PostAggregator field2 = EasyMock.createMock(PostAggregator.class);
    EasyMock.expect(field2.compute(EasyMock.anyObject(Map.class))).andReturn(s2).anyTimes();
    EasyMock.replay(field1, field2);
    final ArrayOfDoublesSketchToQuantilesSketchPostAggregator postAgg1 = new ArrayOfDoublesSketchToQuantilesSketchPostAggregator("a", field1, null, null);
    final ArrayOfDoublesSketchToQuantilesSketchPostAggregator postAgg2 = new ArrayOfDoublesSketchToQuantilesSketchPostAggregator("a", field2, null, null);
    Comparator comparator = postAgg1.getComparator();
    DoublesSketch sketch1 = postAgg1.compute(ImmutableMap.of());
    DoublesSketch sketch2 = postAgg2.compute(ImmutableMap.of());
    // comparator compares value of getN, which is 1 for sketch1 and 2 for sketch2
    Assert.assertEquals(-1, comparator.compare(sketch1, sketch2));
}
Also used : DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) ArrayOfDoublesUpdatableSketchBuilder(org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder) ArrayOfDoublesUpdatableSketch(org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) ConstantPostAggregator(org.apache.druid.query.aggregation.post.ConstantPostAggregator) Comparator(java.util.Comparator) Test(org.junit.Test)

Example 24 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class DoublesSketchComplexMetricSerdeTest method testExtractorOnPositiveNumber.

@Test
public void testExtractorOnPositiveNumber() {
    final DoublesSketchComplexMetricSerde serde = new DoublesSketchComplexMetricSerde();
    final ComplexMetricExtractor extractor = serde.getExtractor();
    final DoublesSketch sketch = (DoublesSketch) extractor.extractValue(new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), "foo");
    Assert.assertEquals(1, sketch.getRetainedItems());
    Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d);
}
Also used : DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) ComplexMetricExtractor(org.apache.druid.segment.serde.ComplexMetricExtractor) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

DoublesSketch (org.apache.datasketches.quantiles.DoublesSketch)24 Test (org.junit.Test)17 File (java.io.File)11 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 ResultRow (org.apache.druid.query.groupby.ResultRow)9 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)5 ComplexMetricExtractor (org.apache.druid.segment.serde.ComplexMetricExtractor)5 DoublesUnion (org.apache.datasketches.quantiles.DoublesUnion)3 Result (org.apache.druid.query.Result)2 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)2 Comparator (java.util.Comparator)1 UpdateDoublesSketch (org.apache.datasketches.quantiles.UpdateDoublesSketch)1 ArrayOfDoublesUpdatableSketch (org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch)1 ArrayOfDoublesUpdatableSketchBuilder (org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder)1 IAE (org.apache.druid.java.util.common.IAE)1 ObjectAggregateCombiner (org.apache.druid.query.aggregation.ObjectAggregateCombiner)1 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)1 ConstantPostAggregator (org.apache.druid.query.aggregation.post.ConstantPostAggregator)1 BaseDoubleColumnValueSelector (org.apache.druid.segment.BaseDoubleColumnValueSelector)1