use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method canRunQueryUsingClusterWalker.
/**
* Checks if a query can be handled wholly by {@link #clusterClient}. Assumes that it is a
* {@link CachingClusteredClient} or something that behaves similarly.
*/
private <T> boolean canRunQueryUsingClusterWalker(Query<T> query) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// subqueries on its own).
return analysis.isConcreteTableBased() && (!analysis.isQuery() || toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery()));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method generateSubqueryIds.
/**
* This method returns the datasource by populating all the {@link QueryDataSource} with correct nesting level and
* sibling order of all the subqueries that are present.
* It also plumbs parent query's id and sql id in case the subqueries don't have it set by default
*
* @param rootDataSource Datasource whose subqueries need to be populated
* @param parentQueryId Parent Query's ID, can be null if do not need to update this in the subqueries
* @param parentSqlQueryId Parent Query's SQL Query ID, can be null if do not need to update this in the subqueries
* @return DataSource populated with the subqueries
*/
private DataSource generateSubqueryIds(DataSource rootDataSource, @Nullable final String parentQueryId, @Nullable final String parentSqlQueryId) {
Queue<DataSource> queue = new ArrayDeque<>();
queue.add(rootDataSource);
// Performs BFS on the datasource tree to find the nesting level, and the sibling order of the query datasource
Map<QueryDataSource, Pair<Integer, Integer>> queryDataSourceToSubqueryIds = new HashMap<>();
int level = 1;
while (!queue.isEmpty()) {
int size = queue.size();
int siblingOrder = 1;
for (int i = 0; i < size; ++i) {
DataSource currentDataSource = queue.poll();
if (currentDataSource == null) {
// Shouldn't be encountered
continue;
}
if (currentDataSource instanceof QueryDataSource) {
queryDataSourceToSubqueryIds.put((QueryDataSource) currentDataSource, new Pair<>(level, siblingOrder));
++siblingOrder;
}
queue.addAll(currentDataSource.getChildren());
}
++level;
}
/*
Returns the datasource by populating all the subqueries with the id generated in the map above.
Implemented in a separate function since the methods on datasource and queries return a new datasource/query
*/
return insertSubqueryIds(rootDataSource, queryDataSourceToSubqueryIds, parentQueryId, parentSqlQueryId);
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method inlineIfNecessary.
/**
* Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
*
* 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
* 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
*
* @param dataSource datasource to process.
* @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
* will consider whether the toolchest can handle a subquery on the datasource using
* {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
* not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
* for an example of a toolchest that can handle subqueries.
* @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
* for a particular master query
* @param maxSubqueryRows Max rows that all the subqueries generated by a master query can have, combined
* @param dryRun if true, does not actually execute any subqueries, but will inline empty result sets.
*/
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
if (dataSource instanceof QueryDataSource) {
// This datasource is a subquery.
final Query subQuery = ((QueryDataSource) dataSource).getQuery();
final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
// Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
// them (e.g. subqueries nested under a join).
final Stack<DataSource> stack = new Stack<>();
DataSource current = dataSource;
while (current instanceof QueryDataSource) {
stack.push(current);
current = Iterables.getOnlyElement(current.getChildren());
}
// lgtm [java/contradictory-type-checks]
assert !(current instanceof QueryDataSource);
current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
while (!stack.isEmpty()) {
current = stack.pop().withChildren(Collections.singletonList(current));
}
assert current instanceof QueryDataSource;
if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
return current;
} else {
// We need to consider inlining it.
return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
// Subquery needs to be inlined. Assign it a subquery id and run it.
final Sequence<?> queryResults;
if (dryRun) {
queryResults = Sequences.empty();
} else {
final QueryRunner subqueryRunner = subQuery.getRunner(this);
queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
}
return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
} else {
// Cannot inline subquery. Attempt to inline one level deeper, and then try again.
return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else {
// Not a query datasource. Walk children and see if there's anything to inline.
return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
}
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class GroupByQueryQueryToolChest method mergeGroupByResultsWithoutPushDown.
private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(GroupByStrategy groupByStrategy, GroupByQuery query, GroupByQueryResource resource, QueryRunner<ResultRow> runner, ResponseContext context) {
// If there's a subquery, merge subquery results and then apply the aggregator
final DataSource dataSource = query.getDataSource();
if (dataSource instanceof QueryDataSource) {
final GroupByQuery subquery;
try {
// Inject outer query context keys into subquery if they don't already exist in the subquery context.
// Unlike withOverriddenContext's normal behavior, we want keys present in the subquery to win.
final Map<String, Object> subqueryContext = new TreeMap<>();
if (query.getContext() != null) {
for (Map.Entry<String, Object> entry : query.getContext().entrySet()) {
if (entry.getValue() != null) {
subqueryContext.put(entry.getKey(), entry.getValue());
}
}
}
if (((QueryDataSource) dataSource).getQuery().getContext() != null) {
subqueryContext.putAll(((QueryDataSource) dataSource).getQuery().getContext());
}
subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
} catch (ClassCastException e) {
throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
}
final Sequence<ResultRow> subqueryResult = mergeGroupByResults(groupByStrategy, subquery.withOverriddenContext(ImmutableMap.of(// in the end when returning results to user. (note this is only respected by groupBy v1)
GroupByQueryHelper.CTX_KEY_SORT_RESULTS, false)), resource, runner, context);
final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
if (query.getSubtotalsSpec() != null) {
return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false));
} else {
return groupByStrategy.applyPostProcessing(groupByStrategy.processSubqueryResult(subquery, query, resource, finalizingResults, false), query);
}
} else {
if (query.getSubtotalsSpec() != null) {
return groupByStrategy.processSubtotalsSpec(query, resource, groupByStrategy.mergeResults(runner, query.withSubtotalsSpec(null), context));
} else {
return groupByStrategy.applyPostProcessing(groupByStrategy.mergeResults(runner, query, context), query);
}
}
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class MovingAverageQueryRunner method run.
@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
List<Interval> intervals;
final Period period;
// Get the largest bucket from the list of averagers
Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
int buckets = opt.orElse(0);
// Extend the interval beginning by specified bucket - 1
if (maq.getGranularity() instanceof PeriodGranularity) {
period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
int offset = buckets <= 0 ? 0 : (1 - buckets);
intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
} else {
throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
}
Sequence<Row> resultsSeq;
DataSource dataSource = maq.getDataSource();
if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
// build groupBy query from movingAverage query
GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
GroupByQuery gbq = builder.build();
ResponseContext gbqResponseContext = ResponseContext.createEmpty();
gbqResponseContext.merge(responseContext);
gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
} else {
// no dimensions, so optimize this as a TimeSeries
TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
ResponseContext tsqResponseContext = ResponseContext.createEmpty();
tsqResponseContext.merge(responseContext);
tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
}
// Process into period buckets
Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
// Apply the windows analysis functions
Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
// Apply any postAveragers
Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
// remove rows outside the reporting window
List<Interval> reportingIntervals = maq.getIntervals();
movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
// Apply any having, sorting, and limits
movingAvgResults = maq.applyLimit(movingAvgResults);
return movingAvgResults;
}
Aggregations