Search in sources :

Example 41 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class TDigestSketchAggregatorTest method testIngestingSketches.

@Test
public void testIngestingSketches() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("doubles_sketch_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"product\", \"sketch\"]", "  }", "}"), String.join("\n", "[", "  {\"type\": \"tDigestSketch\", \"name\": \"first_level_merge_sketch\", \"fieldName\": \"sketch\", " + "\"compression\": " + "200}", "]"), // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"tDigestSketch\", \"name\": \"second_level_merge_sketch\", \"fieldName\": " + "\"first_level_merge_sketch\", \"compression\": " + "200}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"second_level_merge_sketch\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    // post agg
    // "quantiles"
    Object quantilesObject = row.get(1);
    Assert.assertTrue(quantilesObject instanceof double[]);
    double[] quantiles = (double[]) quantilesObject;
    // min value
    Assert.assertEquals(0.001, quantiles[0], 0.0006);
    // median value
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.47 : 0.5, quantiles[1], 0.05);
    // max value
    Assert.assertEquals(1, quantiles[2], 0.05);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 42 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class TDigestSketchAggregatorTest method buildingSketchesAtIngestionTime.

@Test
public void buildingSketchesAtIngestionTime() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [ \"sequenceNumber\"],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", "  }", "}"), "[{\"type\": \"tDigestSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"compression\": 200}]", // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"tDigestSketch\", \"name\": \"merged_sketch\", \"fieldName\": \"sketch\", " + "\"compression\": " + "200}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], " + "\"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"merged_sketch\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    // post agg
    // "quantiles"
    Object quantilesObject = row.get(1);
    Assert.assertTrue(quantilesObject instanceof double[]);
    double[] quantiles = (double[]) quantilesObject;
    // min value
    Assert.assertEquals(0.001, quantiles[0], 0.0006);
    // median value
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.47 : 0.5, quantiles[1], 0.05);
    // max value
    Assert.assertEquals(1, quantiles[2], 0.05);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 43 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class MapVirtualColumnGroupByTest method testWithSubColumn.

@Test
public void testWithSubColumn() {
    final GroupByQuery query = new GroupByQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), null, Granularities.ALL, ImmutableList.of(new DefaultDimensionSpec("params.key3", "params.key3")), ImmutableList.of(new CountAggregatorFactory("count")), null, null, null, null, null);
    final List<ResultRow> result = runner.run(QueryPlus.wrap(query)).toList();
    final List<ResultRow> expected = ImmutableList.of(new MapBasedRow(DateTimes.of("2011-01-12T00:00:00.000Z"), MapVirtualColumnTestBase.mapOf("count", 1L, "params.key3", "value3")), new MapBasedRow(DateTimes.of("2011-01-12T00:00:00.000Z"), MapVirtualColumnTestBase.mapOf("count", 2L))).stream().map(row -> ResultRow.fromLegacyRow(row, query)).collect(Collectors.toList());
    Assert.assertEquals(expected, result);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Intervals(org.apache.druid.java.util.common.Intervals) StupidPool(org.apache.druid.collections.StupidPool) DefaultBlockingPool(org.apache.druid.collections.DefaultBlockingPool) ByteBuffer(java.nio.ByteBuffer) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ImmutableList(com.google.common.collect.ImmutableList) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GroupByStrategySelector(org.apache.druid.query.groupby.strategy.GroupByStrategySelector) ExpectedException(org.junit.rules.ExpectedException) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Before(org.junit.Before) DateTimes(org.apache.druid.java.util.common.DateTimes) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) List(java.util.List) Rule(org.junit.Rule) QueryRunnerTestHelper(org.apache.druid.query.QueryRunnerTestHelper) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) SegmentId(org.apache.druid.timeline.SegmentId) Assert(org.junit.Assert) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 44 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class DoublesSketchAggregatorTest method buildingSketchesAtIngestionTime.

@Test
public void buildingSketchesAtIngestionTime() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"product\"],", "      \"dimensionExclusions\": [ \"sequenceNumber\"],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", "  }", "}"), "[{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}," + "{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}]", // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 128},", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Object sketchObject = row.get(0);
    Assert.assertTrue(sketchObject instanceof Long);
    long sketchValue = (long) sketchObject;
    Assert.assertEquals(400, sketchValue);
    Object sketchObjectWithNulls = row.get(1);
    Assert.assertTrue(sketchObjectWithNulls instanceof Long);
    long sketchValueWithNulls = (long) sketchObjectWithNulls;
    Assert.assertEquals(377, sketchValueWithNulls);
    // post agg
    Object quantilesObject = row.get(3);
    Assert.assertTrue(quantilesObject instanceof double[]);
    double[] quantiles = (double[]) quantilesObject;
    // min value
    Assert.assertEquals(0, quantiles[0], 0.05);
    // median value
    Assert.assertEquals(0.5, quantiles[1], 0.05);
    // max value
    Assert.assertEquals(1, quantiles[2], 0.05);
    // post agg
    Object histogramObject = row.get(4);
    Assert.assertTrue(histogramObject instanceof double[]);
    double[] histogram = (double[]) histogramObject;
    Assert.assertEquals(4, histogram.length);
    for (final double bin : histogram) {
        // 400 items uniformly distributed into 4 bins
        Assert.assertEquals(100, bin, 100 * 0.2);
    }
    // post agg with nulls
    Object quantilesObjectWithNulls = row.get(5);
    Assert.assertTrue(quantilesObjectWithNulls instanceof double[]);
    double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls;
    // min value
    Assert.assertEquals(5.0, quantilesWithNulls[0], 0.05);
    // median value
    Assert.assertEquals(7.55, quantilesWithNulls[1], 0.05);
    // max value
    Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05);
    // post agg with nulls
    Object histogramObjectWithNulls = row.get(6);
    Assert.assertTrue(histogramObjectWithNulls instanceof double[]);
    double[] histogramWithNulls = (double[]) histogramObjectWithNulls;
    Assert.assertEquals(4, histogramWithNulls.length);
    for (final double bin : histogramWithNulls) {
        // distribution is skewed due to nulls
        Assert.assertEquals(100, bin, 50);
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 45 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class DoublesSketchAggregatorTest method queryingDataWithFieldNameValueAsFloatInsteadOfSketch.

@Test
public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"sequenceNumber\", \"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", "  }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Object sketchObject = row.get(0);
    Assert.assertTrue(sketchObject instanceof Long);
    long sketchValue = (long) sketchObject;
    Assert.assertEquals(400, sketchValue);
    // post agg
    Object quantileObject = row.get(1);
    Assert.assertTrue(quantileObject instanceof Double);
    // median value
    Assert.assertEquals(0.5, (double) quantileObject, 0.05);
    // post agg
    Object quantilesObject = row.get(2);
    Assert.assertTrue(quantilesObject instanceof double[]);
    double[] quantiles = (double[]) quantilesObject;
    // min value
    Assert.assertEquals(0, quantiles[0], 0.05);
    // median value
    Assert.assertEquals(0.5, quantiles[1], 0.05);
    // max value
    Assert.assertEquals(1, quantiles[2], 0.05);
    // post agg
    Object histogramObject = row.get(3);
    Assert.assertTrue(histogramObject instanceof double[]);
    double[] histogram = (double[]) histogramObject;
    for (final double bin : histogram) {
        // 400 items uniformly
        Assert.assertEquals(100, bin, 100 * 0.2);
    // distributed into 4 bins
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ResultRow (org.apache.druid.query.groupby.ResultRow)129 Test (org.junit.Test)81 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)65 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)59 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)58 File (java.io.File)39 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)37 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)34 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)24 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)20 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)18 List (java.util.List)17 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 ArrayList (java.util.ArrayList)14 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)14 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)13 ByteBuffer (java.nio.ByteBuffer)12