use of org.apache.druid.query.DataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method generateSubqueryIds.
/**
* This method returns the datasource by populating all the {@link QueryDataSource} with correct nesting level and
* sibling order of all the subqueries that are present.
* It also plumbs parent query's id and sql id in case the subqueries don't have it set by default
*
* @param rootDataSource Datasource whose subqueries need to be populated
* @param parentQueryId Parent Query's ID, can be null if do not need to update this in the subqueries
* @param parentSqlQueryId Parent Query's SQL Query ID, can be null if do not need to update this in the subqueries
* @return DataSource populated with the subqueries
*/
private DataSource generateSubqueryIds(DataSource rootDataSource, @Nullable final String parentQueryId, @Nullable final String parentSqlQueryId) {
Queue<DataSource> queue = new ArrayDeque<>();
queue.add(rootDataSource);
// Performs BFS on the datasource tree to find the nesting level, and the sibling order of the query datasource
Map<QueryDataSource, Pair<Integer, Integer>> queryDataSourceToSubqueryIds = new HashMap<>();
int level = 1;
while (!queue.isEmpty()) {
int size = queue.size();
int siblingOrder = 1;
for (int i = 0; i < size; ++i) {
DataSource currentDataSource = queue.poll();
if (currentDataSource == null) {
// Shouldn't be encountered
continue;
}
if (currentDataSource instanceof QueryDataSource) {
queryDataSourceToSubqueryIds.put((QueryDataSource) currentDataSource, new Pair<>(level, siblingOrder));
++siblingOrder;
}
queue.addAll(currentDataSource.getChildren());
}
++level;
}
/*
Returns the datasource by populating all the subqueries with the id generated in the map above.
Implemented in a separate function since the methods on datasource and queries return a new datasource/query
*/
return insertSubqueryIds(rootDataSource, queryDataSourceToSubqueryIds, parentQueryId, parentSqlQueryId);
}
use of org.apache.druid.query.DataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method inlineIfNecessary.
/**
* Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
*
* 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
* 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
*
* @param dataSource datasource to process.
* @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
* will consider whether the toolchest can handle a subquery on the datasource using
* {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
* not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
* for an example of a toolchest that can handle subqueries.
* @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
* for a particular master query
* @param maxSubqueryRows Max rows that all the subqueries generated by a master query can have, combined
* @param dryRun if true, does not actually execute any subqueries, but will inline empty result sets.
*/
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
if (dataSource instanceof QueryDataSource) {
// This datasource is a subquery.
final Query subQuery = ((QueryDataSource) dataSource).getQuery();
final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
// Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
// them (e.g. subqueries nested under a join).
final Stack<DataSource> stack = new Stack<>();
DataSource current = dataSource;
while (current instanceof QueryDataSource) {
stack.push(current);
current = Iterables.getOnlyElement(current.getChildren());
}
// lgtm [java/contradictory-type-checks]
assert !(current instanceof QueryDataSource);
current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
while (!stack.isEmpty()) {
current = stack.pop().withChildren(Collections.singletonList(current));
}
assert current instanceof QueryDataSource;
if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
return current;
} else {
// We need to consider inlining it.
return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
// Subquery needs to be inlined. Assign it a subquery id and run it.
final Sequence<?> queryResults;
if (dryRun) {
queryResults = Sequences.empty();
} else {
final QueryRunner subqueryRunner = subQuery.getRunner(this);
queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
}
return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
} else {
// Cannot inline subquery. Attempt to inline one level deeper, and then try again.
return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else {
// Not a query datasource. Walk children and see if there's anything to inline.
return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
}
}
use of org.apache.druid.query.DataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method getQueryRunnerForIntervals.
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// transform TableDataSource to GlobalTableDataSource when eligible
// before further transformation to potentially inline
// Populate the subquery ids of the subquery id present in the main query
Query<T> newQuery = query.withDataSource(generateSubqueryIds(query.getDataSource(), query.getId(), query.getSqlQueryId()));
final DataSource freeTradeDataSource = globalizeIfPossible(newQuery.getDataSource());
// do an inlining dry run to see if any inlining is necessary, without actually running the queries.
final int maxSubqueryRows = QueryContexts.getMaxSubqueryRows(query, serverConfig.getMaxSubqueryRows());
final DataSource inlineDryRun = inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, true);
if (!canRunQueryUsingClusterWalker(query.withDataSource(inlineDryRun)) && !canRunQueryUsingLocalWalker(query.withDataSource(inlineDryRun))) {
// Dry run didn't go well.
throw new ISE("Cannot handle subquery structure for dataSource: %s", query.getDataSource());
}
// Now that we know the structure is workable, actually do the inlining (if necessary).
newQuery = newQuery.withDataSource(inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, false));
if (canRunQueryUsingLocalWalker(newQuery)) {
// No need to decorate since LocalQuerySegmentWalker does its own.
return new QuerySwappingQueryRunner<>(localClient.getQueryRunnerForIntervals(newQuery, intervals), query, newQuery);
} else if (canRunQueryUsingClusterWalker(newQuery)) {
// See https://github.com/apache/druid/issues/9229 for details.
return new QuerySwappingQueryRunner<>(decorateClusterRunner(newQuery, clusterClient.getQueryRunnerForIntervals(newQuery, intervals)), query, newQuery);
} else {
// that can't be run with either the local or cluster walkers. If this message ever shows up it is a bug.
throw new ISE("Inlined query could not be run");
}
}
use of org.apache.druid.query.DataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method globalizeIfPossible.
private DataSource globalizeIfPossible(final DataSource dataSource) {
if (dataSource instanceof TableDataSource) {
GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(((TableDataSource) dataSource).getName());
if (joinableFactory.isDirectlyJoinable(maybeGlobal)) {
return maybeGlobal;
}
return dataSource;
} else {
List<DataSource> currentChildren = dataSource.getChildren();
List<DataSource> newChildren = new ArrayList<>(currentChildren.size());
for (DataSource child : currentChildren) {
newChildren.add(globalizeIfPossible(child));
}
return dataSource.withChildren(newChildren);
}
}
use of org.apache.druid.query.DataSource in project druid by druid-io.
the class GroupByQueryQueryToolChest method mergeGroupByResultsWithoutPushDown.
private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(GroupByStrategy groupByStrategy, GroupByQuery query, GroupByQueryResource resource, QueryRunner<ResultRow> runner, ResponseContext context) {
// If there's a subquery, merge subquery results and then apply the aggregator
final DataSource dataSource = query.getDataSource();
if (dataSource instanceof QueryDataSource) {
final GroupByQuery subquery;
try {
// Inject outer query context keys into subquery if they don't already exist in the subquery context.
// Unlike withOverriddenContext's normal behavior, we want keys present in the subquery to win.
final Map<String, Object> subqueryContext = new TreeMap<>();
if (query.getContext() != null) {
for (Map.Entry<String, Object> entry : query.getContext().entrySet()) {
if (entry.getValue() != null) {
subqueryContext.put(entry.getKey(), entry.getValue());
}
}
}
if (((QueryDataSource) dataSource).getQuery().getContext() != null) {
subqueryContext.putAll(((QueryDataSource) dataSource).getQuery().getContext());
}
subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
} catch (ClassCastException e) {
throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
}
final Sequence<ResultRow> subqueryResult = mergeGroupByResults(groupByStrategy, subquery.withOverriddenContext(ImmutableMap.of(// in the end when returning results to user. (note this is only respected by groupBy v1)
GroupByQueryHelper.CTX_KEY_SORT_RESULTS, false)), resource, runner, context);
final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
if (query.getSubtotalsSpec() != null) {
return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false));
} else {
return groupByStrategy.applyPostProcessing(groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false), query);
}
} else {
if (query.getSubtotalsSpec() != null) {
return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.mergeResults(runner, query.withSubtotalsSpec(null), context));
} else {
return groupByStrategy.applyPostProcessing(groupByStrategy.mergeResults(runner, query, context), query);
}
}
}
Aggregations