use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteQueryTest method testExactCountDistinctWithFilter.
@Test
public void testExactCountDistinctWithFilter() throws Exception {
final String sqlQuery = "SELECT COUNT(DISTINCT foo.dim1) FILTER(WHERE foo.cnt = 1), SUM(foo.cnt) FROM druid.foo";
// to a bug in the Calcite's rule (AggregateExpandDistinctAggregatesRule)
try {
testQuery(PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT, "false")), // Enable exact count distinct
sqlQuery, CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(), ImmutableList.of());
Assert.fail("query execution should fail");
} catch (RuntimeException e) {
Assert.assertTrue(e.getMessage().contains("Error while applying rule AggregateExpandDistinctAggregatesRule"));
}
requireMergeBuffers(3);
testQuery(PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT, "true")), sqlQuery, CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", NullHandling.replaceWithDefault() ? "(\"cnt\" == 1)" : "((\"cnt\" == 1) > 0)", ColumnType.LONG)).setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING), new DefaultDimensionSpec("v0", "d1", ColumnType.LONG))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new GroupingAggregatorFactory("a1", Arrays.asList("dim1", "v0")))).setSubtotalsSpec(ImmutableList.of(ImmutableList.of("d0", "d1"), ImmutableList.of())).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new CountAggregatorFactory("_a0"), and(not(selector("d0", null, null)), selector("a1", "0", null))), new FilteredAggregatorFactory(new LongMinAggregatorFactory("_a1", "a0"), selector("a1", "3", null)))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { NullHandling.replaceWithDefault() ? 5L : 6L, 6L }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class TestClusterQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
if (!analysis.isConcreteTableBased()) {
throw new ISE("Cannot handle datasource: %s", query.getDataSource());
}
final String dataSourceName = ((TableDataSource) analysis.getBaseDataSource()).getName();
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), new AtomicLong(), analysis.getBaseQuery().orElse(query));
final QueryRunner<T> baseRunner = new FinalizeResultsQueryRunner<>(toolChest.postMergeQueryDecoration(toolChest.mergeResults(toolChest.preMergeQueryDecoration(makeTableRunner(toolChest, factory, getSegmentsForTable(dataSourceName, specs), segmentMapFn)))), toolChest);
// to actually serve the queries
return (theQuery, responseContext) -> {
responseContext.initializeRemainingResponses();
responseContext.addRemainingResponse(theQuery.getQuery().getMostSpecificId(), 0);
if (scheduler != null) {
Set<SegmentServerSelector> segments = new HashSet<>();
specs.forEach(spec -> segments.add(new SegmentServerSelector(spec)));
return scheduler.run(scheduler.prioritizeAndLaneQuery(theQuery, segments), new LazySequence<>(() -> baseRunner.run(theQuery.withQuery(Queries.withSpecificSegments(theQuery.getQuery(), ImmutableList.copyOf(specs))), responseContext)));
} else {
return baseRunner.run(theQuery.withQuery(Queries.withSpecificSegments(theQuery.getQuery(), ImmutableList.copyOf(specs))), responseContext);
}
};
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testExactTopNOnInnerJoinWithLimit.
@Test
public void testExactTopNOnInnerJoinWithLimit() throws Exception {
// Adjust topN threshold, so that the topN engine keeps only 1 slot for aggregates, which should be enough
// to compute the query with limit 1.
minTopNThreshold = 1;
Map<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
context.put(PlannerConfig.CTX_KEY_USE_APPROXIMATE_TOPN, false);
testQuery("select f1.\"dim4\", sum(\"m1\") from numfoo f1 inner join (\n" + " select \"dim4\" from numfoo where dim4 <> 'a' group by 1\n" + ") f2 on f1.\"dim4\" = f2.\"dim4\" group by 1 limit 1", // turn on exact topN
context, ImmutableList.of(new TopNQueryBuilder().intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim4", "_d0")).aggregators(new DoubleSumAggregatorFactory("a0", "m1")).metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)).threshold(1).dataSource(JoinDataSource.create(new TableDataSource("numfoo"), new QueryDataSource(GroupByQuery.builder().setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(new NotDimFilter(new SelectorDimFilter("dim4", "a", null))).setDataSource(new TableDataSource("numfoo")).setDimensions(new DefaultDimensionSpec("dim4", "_d0")).setContext(context).build()), "j0.", "(\"dim4\" == \"j0._d0\")", JoinType.INNER, null, ExprMacroTable.nil())).context(context).build()), ImmutableList.of(new Object[] { "b", 15.0 }));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testGroupByJoinAsNativeQueryWithUnoptimizedFilter.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testGroupByJoinAsNativeQueryWithUnoptimizedFilter(Map<String, Object> queryContext) {
// The query below is the same as the inner groupBy on a join datasource from the test
// testNestedGroupByOnInlineDataSourceWithFilter, except that the selector filter
// dim1=def has been rewritten into an unoptimized filter, dim1 IN (def).
//
// The unoptimized filter will be optimized into dim1=def by the query toolchests in their
// pre-merge decoration function, when it calls DimFilter.optimize().
//
// This test's goal is to ensure that the join filter rewrites function correctly when there are
// unoptimized filters in the join query. The rewrite logic must apply to the optimized form of the filters,
// as this is what will be passed to HashJoinSegmentAdapter.makeCursors(), where the result of the join
// filter pre-analysis is used.
//
// A native query is used because the filter types where we support optimization are the AND/OR/NOT and
// IN filters. However, when expressed in a SQL query, our SQL planning layer is smart enough to already apply
// these optimizations in the native query it generates, making it impossible to test the unoptimized filter forms
// using SQL queries.
//
// The test method is placed here for convenience as this class provides the necessary setup.
Query query = GroupByQuery.builder().setDataSource(join(new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1", "m2").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.dim1")), JoinType.INNER)).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(// provide an unoptimized IN filter
in("dim1", Collections.singletonList("def"), null)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))).setVirtualColumns(expressionVirtualColumn("v0", "'def'", ColumnType.STRING)).build();
QueryLifecycleFactory qlf = CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate);
QueryLifecycle ql = qlf.factorize();
Sequence seq = ql.runSimple(query, CalciteTests.SUPER_USER_AUTH_RESULT, Access.OK);
List<Object> results = seq.toList();
Assert.assertEquals(ImmutableList.of(ResultRow.of("def")), results);
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class CalciteJoinQueryTest method testNestedGroupByOnInlineDataSourceWithFilter.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testNestedGroupByOnInlineDataSourceWithFilter(Map<String, Object> queryContext) throws Exception {
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery("with abc as" + "(" + " SELECT dim1, m2 from druid.foo where \"__time\" >= '2001-01-02'" + ")" + ", def as" + "(" + " SELECT t1.dim1, SUM(t2.m2) as \"metricSum\" " + " from abc as t1 inner join abc as t2 on t1.dim1 = t2.dim1" + " where t1.dim1='def'" + " group by 1" + ")" + "SELECT count(*) from def", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(join(new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), new QueryDataSource(newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))).columns("dim1", "m2").resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).context(queryContext).build()), "j0.", equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.dim1")), JoinType.INNER)).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim1", "def", null)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"))).setVirtualColumns(expressionVirtualColumn("v0", "'def'", ColumnType.STRING)).build()).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setGranularity(Granularities.ALL).setInterval(querySegmentSpec(Filtration.eternity())).build()), ImmutableList.of(new Object[] { 1L }));
}
Aggregations