Search in sources :

Example 71 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class CalciteQueryTest method testExactCountDistinctWithFilter.

@Test
public void testExactCountDistinctWithFilter() throws Exception {
    final String sqlQuery = "SELECT COUNT(DISTINCT foo.dim1) FILTER(WHERE foo.cnt = 1), SUM(foo.cnt) FROM druid.foo";
    // to a bug in the Calcite's rule (AggregateExpandDistinctAggregatesRule)
    try {
        testQuery(PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT, "false")), // Enable exact count distinct
        sqlQuery, CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(), ImmutableList.of());
        Assert.fail("query execution should fail");
    } catch (RuntimeException e) {
        Assert.assertTrue(e.getMessage().contains("Error while applying rule AggregateExpandDistinctAggregatesRule"));
    }
    requireMergeBuffers(3);
    testQuery(PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT, "true")), sqlQuery, CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", NullHandling.replaceWithDefault() ? "(\"cnt\" == 1)" : "((\"cnt\" == 1) > 0)", ColumnType.LONG)).setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING), new DefaultDimensionSpec("v0", "d1", ColumnType.LONG))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new GroupingAggregatorFactory("a1", Arrays.asList("dim1", "v0")))).setSubtotalsSpec(ImmutableList.of(ImmutableList.of("d0", "d1"), ImmutableList.of())).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new CountAggregatorFactory("_a0"), and(not(selector("d0", null, null)), selector("a1", "0", null))), new FilteredAggregatorFactory(new LongMinAggregatorFactory("_a1", "a0"), selector("a1", "3", null)))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { NullHandling.replaceWithDefault() ? 5L : 6L, 6L }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LongMinAggregatorFactory(org.apache.druid.query.aggregation.LongMinAggregatorFactory) Test(org.junit.Test)

Example 72 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class TestClusterQuerySegmentWalker method getQueryRunnerForSegments.

@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
    final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
    if (factory == null) {
        throw new ISE("Unknown query type[%s].", query.getClass());
    }
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
    if (!analysis.isConcreteTableBased()) {
        throw new ISE("Cannot handle datasource: %s", query.getDataSource());
    }
    final String dataSourceName = ((TableDataSource) analysis.getBaseDataSource()).getName();
    final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
    // Make sure this query type can handle the subquery, if present.
    if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
        throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
    }
    final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), new AtomicLong(), analysis.getBaseQuery().orElse(query));
    final QueryRunner<T> baseRunner = new FinalizeResultsQueryRunner<>(toolChest.postMergeQueryDecoration(toolChest.mergeResults(toolChest.preMergeQueryDecoration(makeTableRunner(toolChest, factory, getSegmentsForTable(dataSourceName, specs), segmentMapFn)))), toolChest);
    // to actually serve the queries
    return (theQuery, responseContext) -> {
        responseContext.initializeRemainingResponses();
        responseContext.addRemainingResponse(theQuery.getQuery().getMostSpecificId(), 0);
        if (scheduler != null) {
            Set<SegmentServerSelector> segments = new HashSet<>();
            specs.forEach(spec -> segments.add(new SegmentServerSelector(spec)));
            return scheduler.run(scheduler.prioritizeAndLaneQuery(theQuery, segments), new LazySequence<>(() -> baseRunner.run(theQuery.withQuery(Queries.withSpecificSegments(theQuery.getQuery(), ImmutableList.copyOf(specs))), responseContext)));
        } else {
            return baseRunner.run(theQuery.withQuery(Queries.withSpecificSegments(theQuery.getQuery(), ImmutableList.copyOf(specs))), responseContext);
        }
    };
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) Function(java.util.function.Function) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SegmentReference(org.apache.druid.segment.SegmentReference) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) SpecificSegmentQueryRunner(org.apache.druid.query.spec.SpecificSegmentQueryRunner) Map(java.util.Map) SegmentServerSelector(org.apache.druid.client.SegmentServerSelector) QueryRunner(org.apache.druid.query.QueryRunner) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Nullable(javax.annotation.Nullable) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) Execs(org.apache.druid.java.util.common.concurrent.Execs) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) QueryToolChest(org.apache.druid.query.QueryToolChest) JoinableFactoryWrapper(org.apache.druid.segment.join.JoinableFactoryWrapper) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) SpecificSegmentSpec(org.apache.druid.query.spec.SpecificSegmentSpec) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TableDataSource(org.apache.druid.query.TableDataSource) Queries(org.apache.druid.query.Queries) AtomicLong(java.util.concurrent.atomic.AtomicLong) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) ReferenceCountingSegmentQueryRunner(org.apache.druid.query.ReferenceCountingSegmentQueryRunner) QueryRunnerFactory(org.apache.druid.query.QueryRunnerFactory) Preconditions(com.google.common.base.Preconditions) FunctionalIterable(org.apache.druid.java.util.common.guava.FunctionalIterable) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) Query(org.apache.druid.query.Query) SegmentReference(org.apache.druid.segment.SegmentReference) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Filters(org.apache.druid.segment.filter.Filters) AtomicLong(java.util.concurrent.atomic.AtomicLong) TableDataSource(org.apache.druid.query.TableDataSource) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) SegmentServerSelector(org.apache.druid.client.SegmentServerSelector) ISE(org.apache.druid.java.util.common.ISE) LazySequence(org.apache.druid.java.util.common.guava.LazySequence)

Example 73 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class CalciteJoinQueryTest method testExactTopNOnInnerJoinWithLimit.

@Test
public void testExactTopNOnInnerJoinWithLimit() throws Exception {
    // Adjust topN threshold, so that the topN engine keeps only 1 slot for aggregates, which should be enough
    // to compute the query with limit 1.
    minTopNThreshold = 1;
    Map<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
    context.put(PlannerConfig.CTX_KEY_USE_APPROXIMATE_TOPN, false);
    testQuery("select f1.\"dim4\", sum(\"m1\") from numfoo f1 inner join (\n" + "  select \"dim4\" from numfoo where dim4 <> 'a' group by 1\n" + ") f2 on f1.\"dim4\" = f2.\"dim4\" group by 1 limit 1", // turn on exact topN
    context, ImmutableList.of(new TopNQueryBuilder().intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim4", "_d0")).aggregators(new DoubleSumAggregatorFactory("a0", "m1")).metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)).threshold(1).dataSource(JoinDataSource.create(new TableDataSource("numfoo"), new QueryDataSource(GroupByQuery.builder().setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(new NotDimFilter(new SelectorDimFilter("dim4", "a", null))).setDataSource(new TableDataSource("numfoo")).setDimensions(new DefaultDimensionSpec("dim4", "_d0")).setContext(context).build()), "j0.", "(\"dim4\" == \"j0._d0\")", JoinType.INNER, null, ExprMacroTable.nil())).context(context).build()), ImmutableList.of(new Object[] { "b", 15.0 }));
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) NotDimFilter(org.apache.druid.query.filter.NotDimFilter) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) HashMap(java.util.HashMap) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 74 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class CalciteJoinQueryTest method testGroupByJoinAsNativeQueryWithUnoptimizedFilter.

@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testGroupByJoinAsNativeQueryWithUnoptimizedFilter(Map<String, Object> queryContext) {
    // The query below is the same as the inner groupBy on a join datasource from the test
    // testNestedGroupByOnInlineDataSourceWithFilter, except that the selector filter
    // dim1=def has been rewritten into an unoptimized filter, dim1 IN (def).
    // 
    // The unoptimized filter will be optimized into dim1=def by the query toolchests in their
    // pre-merge decoration function, when it calls DimFilter.optimize().
    // 
    // This test's goal is to ensure that the join filter rewrites function correctly when there are
    // unoptimized filters in the join query. The rewrite logic must apply to the optimized form of the filters,
    // as this is what will be passed to HashJoinSegmentAdapter.makeCursors(), where the result of the join
    // filter pre-analysis is used.
    // 
    // A native query is used because the filter types where we support optimization are the AND/OR/NOT and
    // IN filters. However, when expressed in a SQL query, our SQL planning layer is smart enough to already apply
    // these optimizations in the native query it generates, making it impossible to test the unoptimized filter forms
    // using SQL queries.
    // 
    // The test method is placed here for convenience as this class provides the necessary setup.
    Query query = GroupByQuery.builder().setDataSource(join(new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1", "m2").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.dim1")), JoinType.INNER)).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(// provide an unoptimized IN filter
    in("dim1", Collections.singletonList("def"), null)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))).setVirtualColumns(expressionVirtualColumn("v0", "'def'", ColumnType.STRING)).build();
    QueryLifecycleFactory qlf = CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate);
    QueryLifecycle ql = qlf.factorize();
    Sequence seq = ql.runSimple(query, CalciteTests.SUPER_USER_AUTH_RESULT, Access.OK);
    List<Object> results = seq.toList();
    Assert.assertEquals(ImmutableList.of(ResultRow.of("def")), results);
}
Also used : QueryLifecycle(org.apache.druid.server.QueryLifecycle) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryLifecycleFactory(org.apache.druid.server.QueryLifecycleFactory) Sequence(org.apache.druid.java.util.common.guava.Sequence) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Parameters(junitparams.Parameters) Test(org.junit.Test)

Example 75 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class CalciteJoinQueryTest method testNestedGroupByOnInlineDataSourceWithFilter.

@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testNestedGroupByOnInlineDataSourceWithFilter(Map<String, Object> queryContext) throws Exception {
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("with abc as" + "(" + "  SELECT dim1, m2 from druid.foo where \"__time\" >= '2001-01-02'" + ")" + ", def as" + "(" + "  SELECT t1.dim1, SUM(t2.m2) as \"metricSum\" " + "  from abc as t1 inner join abc as t2 on t1.dim1 = t2.dim1" + "  where t1.dim1='def'" + "  group by 1" + ")" + "SELECT count(*) from def", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(join(new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1", "m2").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.dim1")), JoinType.INNER)).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim1", "def", null)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))).setVirtualColumns(expressionVirtualColumn("v0", "'def'", ColumnType.STRING)).build()).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).build()), ImmutableList.of(new Object[] { 1L }));
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Parameters(junitparams.Parameters) Test(org.junit.Test)

Aggregations

QueryDataSource (org.apache.druid.query.QueryDataSource)95 Test (org.junit.Test)81 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)51 TableDataSource (org.apache.druid.query.TableDataSource)44 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)41 Parameters (junitparams.Parameters)30 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)29 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)23 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)14 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)14 LookupDataSource (org.apache.druid.query.LookupDataSource)13 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)11 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)11 ArithmeticPostAggregator (org.apache.druid.query.aggregation.post.ArithmeticPostAggregator)10 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)10 DataSource (org.apache.druid.query.DataSource)8 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)8 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)8 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)8 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)8