Search in sources :

Example 11 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method ingestingSketchesTwoValues.

@Test
public void ingestingSketchesTwoValues() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_sketch_data_two_values.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"sketch\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 2,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {", "      \"type\": \"arrayOfDoublesSketchToMeans\",", "      \"name\": \"means\",", "      \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", "    }", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0);
    Assert.assertEquals("estimate", 40.0, (double) row.get(1), 0);
    Assert.assertEquals("union", 40.0, (double) row.get(3), 0);
    Assert.assertEquals("intersection", 40.0, (double) row.get(4), 0);
    Assert.assertEquals("anotb", 0, (double) row.get(5), 0);
    // means
    Object meansObj = row.get(6);
    Assert.assertTrue(meansObj instanceof double[]);
    double[] means = (double[]) meansObj;
    Assert.assertEquals(2, means.length);
    Assert.assertEquals(1.0, means[0], 0);
    Assert.assertEquals(2.0, means[1], 0);
    // quantiles-sketch
    Object quantilesObj = row.get(2);
    Assert.assertTrue(quantilesObj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) quantilesObj;
    Assert.assertEquals(40, ds.getN());
    Assert.assertEquals(1.0, ds.getMinValue(), 0);
    Assert.assertEquals(1.0, ds.getMaxValue(), 0);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 12 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtIngestionTimeThreeValuesAndNulls.

@Test
public void buildingSketchesAtIngestionTimeThreeValuesAndNulls() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data_three_values_and_nulls.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"key\", \"value1\", \"value2\", \"value3\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"value3\" ], \"nominalEntries\": 1024}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 3}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {", "      \"type\": \"arrayOfDoublesSketchToMeans\",", "      \"name\": \"means\",", "      \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", "    },", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals("sketch", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(0), 0);
    Assert.assertEquals("estimate", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(1), 0);
    Assert.assertEquals("union", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(3), 0);
    Assert.assertEquals("intersection", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(4), 0);
    Assert.assertEquals("anotb", 0, (double) row.get(5), 0);
    // means
    Object meansObj = row.get(6);
    Assert.assertTrue(meansObj instanceof double[]);
    double[] means = (double[]) meansObj;
    Assert.assertEquals(3, means.length);
    Assert.assertEquals(1.0, means[0], 0);
    Assert.assertEquals(2.0, means[1], 0);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 2.25 : 3.0, means[2], 0.1);
    // quantiles-sketch
    Object obj = row.get(2);
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds.getN());
    Assert.assertEquals(2.0, ds.getMinValue(), 0);
    Assert.assertEquals(2.0, ds.getMaxValue(), 0);
    // quantiles-sketch-with-nulls
    Object objSketch2 = row.get(7);
    Assert.assertTrue(objSketch2 instanceof DoublesSketch);
    DoublesSketch ds2 = (DoublesSketch) objSketch2;
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds2.getN());
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinValue(), 0);
    Assert.assertEquals(3.0, ds2.getMaxValue(), 0);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 13 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtQueryTimeWithNullsTest.

// Three buckets with null values
@Test
public void buildingSketchesAtQueryTimeWithNullsTest() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data_three_values_and_nulls.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\", \"key\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"key\", \"value1\", \"value2\", \"value3\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"doubleSum\", \"name\": \"value1\", \"fieldName\": \"value1\"},", "  {\"type\": \"doubleSum\", \"name\": \"value2\", \"fieldName\": \"value2\"},", "  {\"type\": \"doubleSum\", \"name\": \"value3\", \"fieldName\": \"value3\"}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"virtualColumns\": [{\"type\": \"expression\",\"name\": \"nonulls3\",\"expression\": \"nvl(value3, 0.0)\",\"outputType\": \"DOUBLE\"}],", "  \"aggregations\": [", "   {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"value3\" ], \"nominalEntries\": 1024},", "   {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketchNoNulls\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"nonulls3\" ], \"nominalEntries\": 1024}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimateNoNulls\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"numberOfValues\": 3,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {", "      \"type\": \"arrayOfDoublesSketchToMeans\",", "      \"name\": \"means\",", "      \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", "    },", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-no-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals("sketch", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(0), 0);
    Assert.assertEquals("sketchNoNulls", 40.0, (double) row.get(1), 0);
    Assert.assertEquals("estimate", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(2), 0);
    Assert.assertEquals("estimateNoNulls", 40.0, (double) row.get(3), 0);
    Assert.assertEquals("union", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(5), 0);
    Assert.assertEquals("intersection", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(6), 0);
    Assert.assertEquals("anotb", 0, (double) row.get(7), 0);
    // means
    Object meansObj = row.get(8);
    Assert.assertTrue(meansObj instanceof double[]);
    double[] means = (double[]) meansObj;
    Assert.assertEquals(3, means.length);
    Assert.assertEquals(1.0, means[0], 0);
    Assert.assertEquals(2.0, means[1], 0);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 2.25 : 3.0, means[2], 0.1);
    // quantiles-sketch
    Object obj = row.get(4);
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds.getN());
    Assert.assertEquals(2.0, ds.getMinValue(), 0);
    Assert.assertEquals(2.0, ds.getMaxValue(), 0);
    // quantiles-sketch-with-nulls
    Object objSketch2 = row.get(9);
    Assert.assertTrue(objSketch2 instanceof DoublesSketch);
    DoublesSketch ds2 = (DoublesSketch) objSketch2;
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds2.getN());
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinValue(), 0);
    Assert.assertEquals(3.0, ds2.getMaxValue(), 0);
    // quantiles-sketch-no-nulls
    Object objSketch3 = row.get(10);
    Assert.assertTrue(objSketch3 instanceof DoublesSketch);
    DoublesSketch ds3 = (DoublesSketch) objSketch3;
    Assert.assertEquals(40, ds3.getN());
    Assert.assertEquals(0.0, ds3.getMinValue(), 0);
    Assert.assertEquals(3.0, ds3.getMaxValue(), 0);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 14 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtQueryTimeUsingNumericalTimeseries.

@Test
public void buildingSketchesAtQueryTimeUsingNumericalTimeseries() throws Exception {
    Sequence<Result<TimeseriesResultValue>> seq = tsHelper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    40, String.join("\n", "{", "  \"queryType\": \"timeseries\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", "    {\"type\": \"count\", \"name\":\"cnt\"}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<Result<TimeseriesResultValue>> results = seq.toList();
    Assert.assertEquals(1, results.size());
    TimeseriesResultValue row = results.get(0).getValue();
    Assert.assertEquals("cnt", 40.0, row.getDoubleMetric("cnt"), 0);
    Assert.assertEquals("sketch", 40.0, row.getDoubleMetric("sketch"), 0);
    Assert.assertEquals("estimate", 40.0, row.getDoubleMetric("estimate"), 0);
    Assert.assertEquals("union", 40.0, row.getDoubleMetric("union"), 0);
    Assert.assertEquals("intersection", 40.0, row.getDoubleMetric("intersection"), 0);
    Assert.assertEquals("anotb", 0, row.getDoubleMetric("anotb"), 0);
    // quantiles-sketch
    Object obj = row.getMetric("quantiles-sketch");
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(40, ds.getN());
    Assert.assertEquals(1.0, ds.getMinValue(), 0);
    Assert.assertEquals(1.0, ds.getMaxValue(), 0);
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) Result(org.apache.druid.query.Result) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 15 with DoublesSketch

use of org.apache.datasketches.quantiles.DoublesSketch in project druid by druid-io.

the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtQueryTimeTimeseries.

@Test
public void buildingSketchesAtQueryTimeTimeseries() throws Exception {
    Sequence<Result<TimeseriesResultValue>> seq = tsHelper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    40, String.join("\n", "{", "  \"queryType\": \"timeseries\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"aggregations\": [", "    {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", "    {\"type\": \"count\", \"name\":\"cnt\"}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"union\",", "      \"operation\": \"UNION\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"intersection\",", "      \"operation\": \"INTERSECT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }},", "    {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", "      \"type\": \"arrayOfDoublesSketchSetOp\",", "      \"name\": \"anotb\",", "      \"operation\": \"NOT\",", "      \"nominalEntries\": 1024,", "      \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", "    }}", "  ],", "  \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
    List<Result<TimeseriesResultValue>> results = seq.toList();
    Assert.assertEquals(1, results.size());
    TimeseriesResultValue row = results.get(0).getValue();
    Assert.assertEquals("cnt", 40.0, row.getDoubleMetric("cnt"), 0);
    Assert.assertEquals("sketch", 40.0, row.getDoubleMetric("sketch"), 0);
    Assert.assertEquals("estimate", 40.0, row.getDoubleMetric("estimate"), 0);
    Assert.assertEquals("union", 40.0, row.getDoubleMetric("union"), 0);
    Assert.assertEquals("intersection", 40.0, row.getDoubleMetric("intersection"), 0);
    Assert.assertEquals("anotb", 0, row.getDoubleMetric("anotb"), 0);
    // quantiles-sketch
    Object obj = row.getMetric("quantiles-sketch");
    Assert.assertTrue(obj instanceof DoublesSketch);
    DoublesSketch ds = (DoublesSketch) obj;
    Assert.assertEquals(40, ds.getN());
    Assert.assertEquals(1.0, ds.getMinValue(), 0);
    Assert.assertEquals(1.0, ds.getMaxValue(), 0);
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) File(java.io.File) Result(org.apache.druid.query.Result) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Aggregations

DoublesSketch (org.apache.datasketches.quantiles.DoublesSketch)24 Test (org.junit.Test)17 File (java.io.File)11 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 ResultRow (org.apache.druid.query.groupby.ResultRow)9 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)5 ComplexMetricExtractor (org.apache.druid.segment.serde.ComplexMetricExtractor)5 DoublesUnion (org.apache.datasketches.quantiles.DoublesUnion)3 Result (org.apache.druid.query.Result)2 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)2 Comparator (java.util.Comparator)1 UpdateDoublesSketch (org.apache.datasketches.quantiles.UpdateDoublesSketch)1 ArrayOfDoublesUpdatableSketch (org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch)1 ArrayOfDoublesUpdatableSketchBuilder (org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder)1 IAE (org.apache.druid.java.util.common.IAE)1 ObjectAggregateCombiner (org.apache.druid.query.aggregation.ObjectAggregateCombiner)1 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)1 ConstantPostAggregator (org.apache.druid.query.aggregation.post.ConstantPostAggregator)1 BaseDoubleColumnValueSelector (org.apache.druid.segment.BaseDoubleColumnValueSelector)1