Search in sources :

Example 16 with DataSource

use of org.apache.druid.query.DataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method generateSubqueryIds.

/**
 * This method returns the datasource by populating all the {@link QueryDataSource} with correct nesting level and
 * sibling order of all the subqueries that are present.
 * It also plumbs parent query's id and sql id in case the subqueries don't have it set by default
 *
 * @param rootDataSource   Datasource whose subqueries need to be populated
 * @param parentQueryId    Parent Query's ID, can be null if do not need to update this in the subqueries
 * @param parentSqlQueryId Parent Query's SQL Query ID, can be null if do not need to update this in the subqueries
 * @return DataSource populated with the subqueries
 */
private DataSource generateSubqueryIds(DataSource rootDataSource, @Nullable final String parentQueryId, @Nullable final String parentSqlQueryId) {
    Queue<DataSource> queue = new ArrayDeque<>();
    queue.add(rootDataSource);
    // Performs BFS on the datasource tree to find the nesting level, and the sibling order of the query datasource
    Map<QueryDataSource, Pair<Integer, Integer>> queryDataSourceToSubqueryIds = new HashMap<>();
    int level = 1;
    while (!queue.isEmpty()) {
        int size = queue.size();
        int siblingOrder = 1;
        for (int i = 0; i < size; ++i) {
            DataSource currentDataSource = queue.poll();
            if (currentDataSource == null) {
                // Shouldn't be encountered
                continue;
            }
            if (currentDataSource instanceof QueryDataSource) {
                queryDataSourceToSubqueryIds.put((QueryDataSource) currentDataSource, new Pair<>(level, siblingOrder));
                ++siblingOrder;
            }
            queue.addAll(currentDataSource.getChildren());
        }
        ++level;
    }
    /*
    Returns the datasource by populating all the subqueries with the id generated in the map above.
    Implemented in a separate function since the methods on datasource and queries return a new datasource/query
     */
    return insertSubqueryIds(rootDataSource, queryDataSourceToSubqueryIds, parentQueryId, parentSqlQueryId);
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) HashMap(java.util.HashMap) ArrayDeque(java.util.ArrayDeque) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Pair(org.apache.druid.java.util.common.Pair)

Example 17 with DataSource

use of org.apache.druid.query.DataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method inlineIfNecessary.

/**
 * Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
 *
 * 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
 * 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
 *
 * @param dataSource           datasource to process.
 * @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
 *                             will consider whether the toolchest can handle a subquery on the datasource using
 *                             {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
 *                             not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
 *                             for an example of a toolchest that can handle subqueries.
 * @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
 *                                    for a particular master query
 * @param maxSubqueryRows      Max rows that all the subqueries generated by a master query can have, combined
 * @param dryRun               if true, does not actually execute any subqueries, but will inline empty result sets.
 */
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
    if (dataSource instanceof QueryDataSource) {
        // This datasource is a subquery.
        final Query subQuery = ((QueryDataSource) dataSource).getQuery();
        final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
        if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
            // Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
            // them (e.g. subqueries nested under a join).
            final Stack<DataSource> stack = new Stack<>();
            DataSource current = dataSource;
            while (current instanceof QueryDataSource) {
                stack.push(current);
                current = Iterables.getOnlyElement(current.getChildren());
            }
            // lgtm [java/contradictory-type-checks]
            assert !(current instanceof QueryDataSource);
            current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            while (!stack.isEmpty()) {
                current = stack.pop().withChildren(Collections.singletonList(current));
            }
            assert current instanceof QueryDataSource;
            if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
                return current;
            } else {
                // We need to consider inlining it.
                return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            }
        } else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
            // Subquery needs to be inlined. Assign it a subquery id and run it.
            final Sequence<?> queryResults;
            if (dryRun) {
                queryResults = Sequences.empty();
            } else {
                final QueryRunner subqueryRunner = subQuery.getRunner(this);
                queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
            }
            return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
        } else {
            // Cannot inline subquery. Attempt to inline one level deeper, and then try again.
            return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
        }
    } else {
        // Not a query datasource. Walk children and see if there's anything to inline.
        return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
    }
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) Query(org.apache.druid.query.Query) QueryToolChest(org.apache.druid.query.QueryToolChest) Sequence(org.apache.druid.java.util.common.guava.Sequence) ResultLevelCachingQueryRunner(org.apache.druid.query.ResultLevelCachingQueryRunner) RetryQueryRunner(org.apache.druid.query.RetryQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Stack(java.util.Stack)

Example 18 with DataSource

use of org.apache.druid.query.DataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method getQueryRunnerForIntervals.

@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    // transform TableDataSource to GlobalTableDataSource when eligible
    // before further transformation to potentially inline
    // Populate the subquery ids of the subquery id present in the main query
    Query<T> newQuery = query.withDataSource(generateSubqueryIds(query.getDataSource(), query.getId(), query.getSqlQueryId()));
    final DataSource freeTradeDataSource = globalizeIfPossible(newQuery.getDataSource());
    // do an inlining dry run to see if any inlining is necessary, without actually running the queries.
    final int maxSubqueryRows = QueryContexts.getMaxSubqueryRows(query, serverConfig.getMaxSubqueryRows());
    final DataSource inlineDryRun = inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, true);
    if (!canRunQueryUsingClusterWalker(query.withDataSource(inlineDryRun)) && !canRunQueryUsingLocalWalker(query.withDataSource(inlineDryRun))) {
        // Dry run didn't go well.
        throw new ISE("Cannot handle subquery structure for dataSource: %s", query.getDataSource());
    }
    // Now that we know the structure is workable, actually do the inlining (if necessary).
    newQuery = newQuery.withDataSource(inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, false));
    if (canRunQueryUsingLocalWalker(newQuery)) {
        // No need to decorate since LocalQuerySegmentWalker does its own.
        return new QuerySwappingQueryRunner<>(localClient.getQueryRunnerForIntervals(newQuery, intervals), query, newQuery);
    } else if (canRunQueryUsingClusterWalker(newQuery)) {
        // See https://github.com/apache/druid/issues/9229 for details.
        return new QuerySwappingQueryRunner<>(decorateClusterRunner(newQuery, clusterClient.getQueryRunnerForIntervals(newQuery, intervals)), query, newQuery);
    } else {
        // that can't be run with either the local or cluster walkers. If this message ever shows up it is a bug.
        throw new ISE("Inlined query could not be run");
    }
}
Also used : Query(org.apache.druid.query.Query) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ISE(org.apache.druid.java.util.common.ISE) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource)

Example 19 with DataSource

use of org.apache.druid.query.DataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method globalizeIfPossible.

private DataSource globalizeIfPossible(final DataSource dataSource) {
    if (dataSource instanceof TableDataSource) {
        GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(((TableDataSource) dataSource).getName());
        if (joinableFactory.isDirectlyJoinable(maybeGlobal)) {
            return maybeGlobal;
        }
        return dataSource;
    } else {
        List<DataSource> currentChildren = dataSource.getChildren();
        List<DataSource> newChildren = new ArrayList<>(currentChildren.size());
        for (DataSource child : currentChildren) {
            newChildren.add(globalizeIfPossible(child));
        }
        return dataSource.withChildren(newChildren);
    }
}
Also used : GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) ArrayList(java.util.ArrayList) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource)

Example 20 with DataSource

use of org.apache.druid.query.DataSource in project druid by druid-io.

the class GroupByQueryQueryToolChest method mergeGroupByResultsWithoutPushDown.

private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(GroupByStrategy groupByStrategy, GroupByQuery query, GroupByQueryResource resource, QueryRunner<ResultRow> runner, ResponseContext context) {
    // If there's a subquery, merge subquery results and then apply the aggregator
    final DataSource dataSource = query.getDataSource();
    if (dataSource instanceof QueryDataSource) {
        final GroupByQuery subquery;
        try {
            // Inject outer query context keys into subquery if they don't already exist in the subquery context.
            // Unlike withOverriddenContext's normal behavior, we want keys present in the subquery to win.
            final Map<String, Object> subqueryContext = new TreeMap<>();
            if (query.getContext() != null) {
                for (Map.Entry<String, Object> entry : query.getContext().entrySet()) {
                    if (entry.getValue() != null) {
                        subqueryContext.put(entry.getKey(), entry.getValue());
                    }
                }
            }
            if (((QueryDataSource) dataSource).getQuery().getContext() != null) {
                subqueryContext.putAll(((QueryDataSource) dataSource).getQuery().getContext());
            }
            subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
            subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
        } catch (ClassCastException e) {
            throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
        }
        final Sequence<ResultRow> subqueryResult = mergeGroupByResults(groupByStrategy, subquery.withOverriddenContext(ImmutableMap.of(// in the end when returning results to user. (note this is only respected by groupBy v1)
        GroupByQueryHelper.CTX_KEY_SORT_RESULTS, false)), resource, runner, context);
        final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
        if (query.getSubtotalsSpec() != null) {
            return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false));
        } else {
            return groupByStrategy.applyPostProcessing(groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false), query);
        }
    } else {
        if (query.getSubtotalsSpec() != null) {
            return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.mergeResults(runner, query.withSubtotalsSpec(null), context));
        } else {
            return groupByStrategy.applyPostProcessing(groupByStrategy.mergeResults(runner, query, context), query);
        }
    }
}
Also used : TreeMap(java.util.TreeMap) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TreeMap(java.util.TreeMap)

Aggregations

DataSource (org.apache.druid.query.DataSource)36 TableDataSource (org.apache.druid.query.TableDataSource)23 Test (org.junit.Test)18 JoinDataSource (org.apache.druid.query.JoinDataSource)17 QueryDataSource (org.apache.druid.query.QueryDataSource)16 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)14 Filtration (org.apache.druid.sql.calcite.filtration.Filtration)12 ArrayList (java.util.ArrayList)10 InlineDataSource (org.apache.druid.query.InlineDataSource)7 HashMap (java.util.HashMap)6 Optional (java.util.Optional)6 LookupDataSource (org.apache.druid.query.LookupDataSource)6 UnionDataSource (org.apache.druid.query.UnionDataSource)6 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)6 List (java.util.List)5 Nullable (javax.annotation.Nullable)5 DimFilter (org.apache.druid.query.filter.DimFilter)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)4 ISE (org.apache.druid.java.util.common.ISE)4