Search in sources :

Example 41 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method canRunQueryUsingClusterWalker.

/**
 * Checks if a query can be handled wholly by {@link #clusterClient}. Assumes that it is a
 * {@link CachingClusteredClient} or something that behaves similarly.
 */
private <T> boolean canRunQueryUsingClusterWalker(Query<T> query) {
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    // subqueries on its own).
    return analysis.isConcreteTableBased() && (!analysis.isQuery() || toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery()));
}
Also used : Query(org.apache.druid.query.Query) QueryDataSource(org.apache.druid.query.QueryDataSource) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis)

Example 42 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method generateSubqueryIds.

/**
 * This method returns the datasource by populating all the {@link QueryDataSource} with correct nesting level and
 * sibling order of all the subqueries that are present.
 * It also plumbs parent query's id and sql id in case the subqueries don't have it set by default
 *
 * @param rootDataSource   Datasource whose subqueries need to be populated
 * @param parentQueryId    Parent Query's ID, can be null if do not need to update this in the subqueries
 * @param parentSqlQueryId Parent Query's SQL Query ID, can be null if do not need to update this in the subqueries
 * @return DataSource populated with the subqueries
 */
private DataSource generateSubqueryIds(DataSource rootDataSource, @Nullable final String parentQueryId, @Nullable final String parentSqlQueryId) {
    Queue<DataSource> queue = new ArrayDeque<>();
    queue.add(rootDataSource);
    // Performs BFS on the datasource tree to find the nesting level, and the sibling order of the query datasource
    Map<QueryDataSource, Pair<Integer, Integer>> queryDataSourceToSubqueryIds = new HashMap<>();
    int level = 1;
    while (!queue.isEmpty()) {
        int size = queue.size();
        int siblingOrder = 1;
        for (int i = 0; i < size; ++i) {
            DataSource currentDataSource = queue.poll();
            if (currentDataSource == null) {
                // Shouldn't be encountered
                continue;
            }
            if (currentDataSource instanceof QueryDataSource) {
                queryDataSourceToSubqueryIds.put((QueryDataSource) currentDataSource, new Pair<>(level, siblingOrder));
                ++siblingOrder;
            }
            queue.addAll(currentDataSource.getChildren());
        }
        ++level;
    }
    /*
    Returns the datasource by populating all the subqueries with the id generated in the map above.
    Implemented in a separate function since the methods on datasource and queries return a new datasource/query
     */
    return insertSubqueryIds(rootDataSource, queryDataSourceToSubqueryIds, parentQueryId, parentSqlQueryId);
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) HashMap(java.util.HashMap) ArrayDeque(java.util.ArrayDeque) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Pair(org.apache.druid.java.util.common.Pair)

Example 43 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class ClientQuerySegmentWalker method inlineIfNecessary.

/**
 * Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
 *
 * 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
 * 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
 *
 * @param dataSource           datasource to process.
 * @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
 *                             will consider whether the toolchest can handle a subquery on the datasource using
 *                             {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
 *                             not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
 *                             for an example of a toolchest that can handle subqueries.
 * @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
 *                                    for a particular master query
 * @param maxSubqueryRows      Max rows that all the subqueries generated by a master query can have, combined
 * @param dryRun               if true, does not actually execute any subqueries, but will inline empty result sets.
 */
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
    if (dataSource instanceof QueryDataSource) {
        // This datasource is a subquery.
        final Query subQuery = ((QueryDataSource) dataSource).getQuery();
        final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
        if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
            // Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
            // them (e.g. subqueries nested under a join).
            final Stack<DataSource> stack = new Stack<>();
            DataSource current = dataSource;
            while (current instanceof QueryDataSource) {
                stack.push(current);
                current = Iterables.getOnlyElement(current.getChildren());
            }
            // lgtm [java/contradictory-type-checks]
            assert !(current instanceof QueryDataSource);
            current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            while (!stack.isEmpty()) {
                current = stack.pop().withChildren(Collections.singletonList(current));
            }
            assert current instanceof QueryDataSource;
            if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
                return current;
            } else {
                // We need to consider inlining it.
                return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            }
        } else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
            // Subquery needs to be inlined. Assign it a subquery id and run it.
            final Sequence<?> queryResults;
            if (dryRun) {
                queryResults = Sequences.empty();
            } else {
                final QueryRunner subqueryRunner = subQuery.getRunner(this);
                queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
            }
            return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
        } else {
            // Cannot inline subquery. Attempt to inline one level deeper, and then try again.
            return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
        }
    } else {
        // Not a query datasource. Walk children and see if there's anything to inline.
        return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
    }
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) Query(org.apache.druid.query.Query) QueryToolChest(org.apache.druid.query.QueryToolChest) Sequence(org.apache.druid.java.util.common.guava.Sequence) ResultLevelCachingQueryRunner(org.apache.druid.query.ResultLevelCachingQueryRunner) RetryQueryRunner(org.apache.druid.query.RetryQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Stack(java.util.Stack)

Example 44 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class GroupByQueryQueryToolChest method mergeGroupByResultsWithoutPushDown.

private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(GroupByStrategy groupByStrategy, GroupByQuery query, GroupByQueryResource resource, QueryRunner<ResultRow> runner, ResponseContext context) {
    // If there's a subquery, merge subquery results and then apply the aggregator
    final DataSource dataSource = query.getDataSource();
    if (dataSource instanceof QueryDataSource) {
        final GroupByQuery subquery;
        try {
            // Inject outer query context keys into subquery if they don't already exist in the subquery context.
            // Unlike withOverriddenContext's normal behavior, we want keys present in the subquery to win.
            final Map<String, Object> subqueryContext = new TreeMap<>();
            if (query.getContext() != null) {
                for (Map.Entry<String, Object> entry : query.getContext().entrySet()) {
                    if (entry.getValue() != null) {
                        subqueryContext.put(entry.getKey(), entry.getValue());
                    }
                }
            }
            if (((QueryDataSource) dataSource).getQuery().getContext() != null) {
                subqueryContext.putAll(((QueryDataSource) dataSource).getQuery().getContext());
            }
            subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
            subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
        } catch (ClassCastException e) {
            throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
        }
        final Sequence<ResultRow> subqueryResult = mergeGroupByResults(groupByStrategy, subquery.withOverriddenContext(ImmutableMap.of(// in the end when returning results to user. (note this is only respected by groupBy v1)
        GroupByQueryHelper.CTX_KEY_SORT_RESULTS, false)), resource, runner, context);
        final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
        if (query.getSubtotalsSpec() != null) {
            return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false));
        } else {
            return groupByStrategy.applyPostProcessing(groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false), query);
        }
    } else {
        if (query.getSubtotalsSpec() != null) {
            return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.mergeResults(runner, query.withSubtotalsSpec(null), context));
        } else {
            return groupByStrategy.applyPostProcessing(groupByStrategy.mergeResults(runner, query, context), query);
        }
    }
}
Also used : TreeMap(java.util.TreeMap) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TreeMap(java.util.TreeMap)

Example 45 with QueryDataSource

use of org.apache.druid.query.QueryDataSource in project druid by druid-io.

the class MovingAverageQueryRunner method run.

@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
    MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
    List<Interval> intervals;
    final Period period;
    // Get the largest bucket from the list of averagers
    Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
    int buckets = opt.orElse(0);
    // Extend the interval beginning by specified bucket - 1
    if (maq.getGranularity() instanceof PeriodGranularity) {
        period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
        int offset = buckets <= 0 ? 0 : (1 - buckets);
        intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
    } else {
        throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
    }
    Sequence<Row> resultsSeq;
    DataSource dataSource = maq.getDataSource();
    if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
        // build groupBy query from movingAverage query
        GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
        GroupByQuery gbq = builder.build();
        ResponseContext gbqResponseContext = ResponseContext.createEmpty();
        gbqResponseContext.merge(responseContext);
        gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
        Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
    } else {
        // no dimensions, so optimize this as a TimeSeries
        TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
        ResponseContext tsqResponseContext = ResponseContext.createEmpty();
        tsqResponseContext.merge(responseContext);
        tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
        Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
    }
    // Process into period buckets
    Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
    // Apply the windows analysis functions
    Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
    // Apply any postAveragers
    Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
    // remove rows outside the reporting window
    List<Interval> reportingIntervals = maq.getIntervals();
    movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
    // Apply any having, sorting, and limits
    movingAvgResults = maq.applyLimit(movingAvgResults);
    return movingAvgResults;
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Row(org.apache.druid.data.input.Row) QueryStats(org.apache.druid.server.QueryStats) Interval(org.joda.time.Interval) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Period(org.joda.time.Period) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResponseContext(org.apache.druid.query.context.ResponseContext) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) Throwables(com.google.common.base.Throwables) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) RequestLogger(org.apache.druid.server.log.RequestLogger) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Result(org.apache.druid.query.Result) List(java.util.List) UnionDataSource(org.apache.druid.query.UnionDataSource) RequestLogLine(org.apache.druid.server.RequestLogLine) Optional(java.util.Optional) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Result(org.apache.druid.query.Result) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Period(org.joda.time.Period) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryStats(org.apache.druid.server.QueryStats) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) Interval(org.joda.time.Interval)

Aggregations

QueryDataSource (org.apache.druid.query.QueryDataSource)95 Test (org.junit.Test)81 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)51 TableDataSource (org.apache.druid.query.TableDataSource)44 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)41 Parameters (junitparams.Parameters)30 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)29 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)23 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)14 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)14 LookupDataSource (org.apache.druid.query.LookupDataSource)13 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)11 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)11 ArithmeticPostAggregator (org.apache.druid.query.aggregation.post.ArithmeticPostAggregator)10 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)10 DataSource (org.apache.druid.query.DataSource)8 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)8 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)8 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)8 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)8