Search in sources :

Example 46 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class DoublesSketchAggregatorTest method testFailureWhenMaxStreamLengthHit.

@Test
public void testFailureWhenMaxStreamLengthHit() throws Exception {
    if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) {
        expectedException.expect(new RecursiveExceptionMatcher(IllegalStateException.class));
        expectedException.expectMessage("NullPointerException was thrown while updating Doubles sketch");
        helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"sequenceNumber\", \"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", "  }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
        0, Granularities.NONE, // maxRowCount
        10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128, \"maxStreamLength\": 10}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    } else {
        Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"sequenceNumber\", \"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", "  }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
        0, Granularities.NONE, // maxRowCount
        10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128, \"maxStreamLength\": 10}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
        expectedException.expect(new RecursiveExceptionMatcher(IllegalStateException.class));
        expectedException.expectMessage("NullPointerException was thrown while updating Doubles sketch");
        seq.toList();
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 47 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class DoublesSketchAggregatorTest method buildingSketchesAtQueryTime.

@Test
public void buildingSketchesAtQueryTime() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", "  \"type\": \"string\",", "  \"parseSpec\": {", "    \"format\": \"tsv\",", "    \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", "    \"dimensionsSpec\": {", "      \"dimensions\": [\"sequenceNumber\", \"product\"],", "      \"dimensionExclusions\": [],", "      \"spatialDimensions\": []", "    },", "    \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", "  }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", // minTimestamp
    0, Granularities.NONE, // maxRowCount
    10, String.join("\n", "{", "  \"queryType\": \"groupBy\",", "  \"dataSource\": \"test_datasource\",", "  \"granularity\": \"ALL\",", "  \"dimensions\": [],", "  \"aggregations\": [", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128},", "    {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}", "  ],", "  \"postAggregations\": [", "    {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", "    {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", "    {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", "  ],", "  \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Object sketchObject = row.get(0);
    Assert.assertTrue(sketchObject instanceof Long);
    long sketchValue = (long) sketchObject;
    Assert.assertEquals(400, sketchValue);
    Object sketchObjectWithNulls = row.get(1);
    Assert.assertTrue(sketchObjectWithNulls instanceof Long);
    long sketchValueWithNulls = (long) sketchObjectWithNulls;
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls);
    // post agg
    Object quantileObject = row.get(2);
    Assert.assertTrue(quantileObject instanceof Double);
    // median value
    Assert.assertEquals(0.5, (double) quantileObject, 0.05);
    // post agg
    Object quantilesObject = row.get(3);
    Assert.assertTrue(quantilesObject instanceof double[]);
    double[] quantiles = (double[]) quantilesObject;
    // min value
    Assert.assertEquals(0, quantiles[0], 0.05);
    // median value
    Assert.assertEquals(0.5, quantiles[1], 0.05);
    // max value
    Assert.assertEquals(1, quantiles[2], 0.05);
    // post agg
    Object histogramObject = row.get(4);
    Assert.assertTrue(histogramObject instanceof double[]);
    double[] histogram = (double[]) histogramObject;
    for (final double bin : histogram) {
        // 400 items uniformly
        Assert.assertEquals(100, bin, 100 * 0.2);
    // distributed into 4 bins
    }
    // post agg with nulls
    Object quantileObjectWithNulls = row.get(5);
    Assert.assertTrue(quantileObjectWithNulls instanceof Double);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.4 : 7.5, (double) quantileObjectWithNulls, 0.1);
    // median value
    // post agg with nulls
    Object quantilesObjectWithNulls = row.get(6);
    Assert.assertTrue(quantilesObjectWithNulls instanceof double[]);
    double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls;
    // min value
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 5.0, quantilesWithNulls[0], 0.05);
    // median value
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.4 : 7.5, quantilesWithNulls[1], 0.1);
    // max value
    Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05);
    // post agg with nulls
    Object histogramObjectWithNulls = row.get(7);
    Assert.assertTrue(histogramObjectWithNulls instanceof double[]);
    double[] histogramWithNulls = (double[]) histogramObjectWithNulls;
    for (final double bin : histogramWithNulls) {
        // distribution is skewed due to nulls/0s
        Assert.assertEquals(100, bin, 80);
    // distributed into 4 bins
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 48 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class SketchAggregationTest method testRetentionDataIngestAndGpByQuery.

@Test
public void testRetentionDataIngestAndGpByQuery() throws Exception {
    final GroupByQuery groupByQuery = readQueryFromClasspath("retention_test_data_group_by_query.json", helper.getObjectMapper(), vectorize);
    final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("retention_test_data.tsv").getFile()), readFileFromClasspathAsString("simple_test_data_record_parser.json"), readFileFromClasspathAsString("simple_test_data_aggregators.json"), 0, Granularities.NONE, 5, groupByQuery);
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    Assert.assertEquals(ImmutableList.of(new MapBasedRow(DateTimes.of("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_1").put("p1_unique_country_day_1", 20.0).put("p1_unique_country_day_2", 20.0).put("p1_unique_country_day_3", 10.0).put("sketchEstimatePostAgg", 20.0).put("sketchIntersectionPostAggEstimate1", 10.0).put("sketchIntersectionPostAggEstimate2", 5.0).put("non_existing_col_validation", 0.0).build())).stream().map(row -> ResultRow.fromLegacyRow(row, groupByQuery)).collect(Collectors.toList()), results);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Example 49 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class SketchAggregationTest method testEmptySketchAggregateCombine.

@Test
public void testEmptySketchAggregateCombine() throws Exception {
    final GroupByQuery groupByQuery = readQueryFromClasspath("empty_sketch_group_by_query.json", helper.getObjectMapper(), vectorize);
    final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()), readFileFromClasspathAsString("empty_sketch_data_record_parser.json"), readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"), 0, Granularities.NONE, 5, groupByQuery);
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    Assert.assertEquals(ResultRow.fromLegacyRow(new MapBasedRow(DateTimes.of("2019-07-14T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_b").put("sketch_count", 0.0).build()), groupByQuery), results.get(0));
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Example 50 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class HllSketchAggregatorTest method buildSketchesAtQueryTime.

@Test
public void buildSketchesAtQueryTime() throws Exception {
    Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildParserJson(Arrays.asList("dim", "multiDim", "id"), Arrays.asList("timestamp", "dim", "multiDim", "id")), "[]", // minTimestamp
    0, Granularities.NONE, // maxRowCount
    200, buildGroupByQueryJson("HLLSketchBuild", "id", !ROUND));
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    ResultRow row = results.get(0);
    Assert.assertEquals(200, (double) row.get(0), 0.1);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ResultRow (org.apache.druid.query.groupby.ResultRow)129 Test (org.junit.Test)81 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)65 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)59 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)58 File (java.io.File)39 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)37 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)34 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)24 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)20 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)18 List (java.util.List)17 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 ArrayList (java.util.ArrayList)14 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)14 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)13 ByteBuffer (java.nio.ByteBuffer)12