use of org.apache.druid.query.QueryToolChest in project druid by druid-io.
the class ClientQuerySegmentWalker method inlineIfNecessary.
/**
* Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
*
* 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
* 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
*
* @param dataSource datasource to process.
* @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
* will consider whether the toolchest can handle a subquery on the datasource using
* {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
* not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
* for an example of a toolchest that can handle subqueries.
* @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
* for a particular master query
* @param maxSubqueryRows Max rows that all the subqueries generated by a master query can have, combined
* @param dryRun if true, does not actually execute any subqueries, but will inline empty result sets.
*/
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
if (dataSource instanceof QueryDataSource) {
// This datasource is a subquery.
final Query subQuery = ((QueryDataSource) dataSource).getQuery();
final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
// Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
// them (e.g. subqueries nested under a join).
final Stack<DataSource> stack = new Stack<>();
DataSource current = dataSource;
while (current instanceof QueryDataSource) {
stack.push(current);
current = Iterables.getOnlyElement(current.getChildren());
}
// lgtm [java/contradictory-type-checks]
assert !(current instanceof QueryDataSource);
current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
while (!stack.isEmpty()) {
current = stack.pop().withChildren(Collections.singletonList(current));
}
assert current instanceof QueryDataSource;
if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
return current;
} else {
// We need to consider inlining it.
return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
// Subquery needs to be inlined. Assign it a subquery id and run it.
final Sequence<?> queryResults;
if (dryRun) {
queryResults = Sequences.empty();
} else {
final QueryRunner subqueryRunner = subQuery.getRunner(this);
queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
}
return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
} else {
// Cannot inline subquery. Attempt to inline one level deeper, and then try again.
return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else {
// Not a query datasource. Walk children and see if there's anything to inline.
return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
}
}
use of org.apache.druid.query.QueryToolChest in project druid by druid-io.
the class ServerManager method buildAndDecorateQueryRunner.
private <T> QueryRunner<T> buildAndDecorateQueryRunner(final QueryRunnerFactory<T, Query<T>> factory, final QueryToolChest<T, Query<T>> toolChest, final SegmentReference segment, final Optional<byte[]> cacheKeyPrefix, final SegmentDescriptor segmentDescriptor, final AtomicLong cpuTimeAccumulator) {
final SpecificSegmentSpec segmentSpec = new SpecificSegmentSpec(segmentDescriptor);
final SegmentId segmentId = segment.getId();
final Interval segmentInterval = segment.getDataInterval();
// If the segment is closed after this line, ReferenceCountingSegmentQueryRunner will handle and do the right thing.
if (segmentId == null || segmentInterval == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(segmentDescriptor);
}
String segmentIdString = segmentId.toString();
MetricsEmittingQueryRunner<T> metricsEmittingQueryRunnerInner = new MetricsEmittingQueryRunner<>(emitter, toolChest, new ReferenceCountingSegmentQueryRunner<>(factory, segment, segmentDescriptor), QueryMetrics::reportSegmentTime, queryMetrics -> queryMetrics.segment(segmentIdString));
StorageAdapter storageAdapter = segment.asStorageAdapter();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
CachingQueryRunner<T> cachingQueryRunner = new CachingQueryRunner<>(segmentIdString, cacheKeyPrefix, segmentDescriptor, actualDataInterval, objectMapper, cache, toolChest, metricsEmittingQueryRunnerInner, cachePopulator, cacheConfig);
BySegmentQueryRunner<T> bySegmentQueryRunner = new BySegmentQueryRunner<>(segmentId, segmentInterval.getStart(), cachingQueryRunner);
MetricsEmittingQueryRunner<T> metricsEmittingQueryRunnerOuter = new MetricsEmittingQueryRunner<>(emitter, toolChest, bySegmentQueryRunner, QueryMetrics::reportSegmentAndCacheTime, queryMetrics -> queryMetrics.segment(segmentIdString)).withWaitMeasuredFromNow();
SpecificSegmentQueryRunner<T> specificSegmentQueryRunner = new SpecificSegmentQueryRunner<>(metricsEmittingQueryRunnerOuter, segmentSpec);
PerSegmentOptimizingQueryRunner<T> perSegmentOptimizingQueryRunner = new PerSegmentOptimizingQueryRunner<>(specificSegmentQueryRunner, new PerSegmentQueryOptimizationContext(segmentDescriptor));
return new SetAndVerifyContextQueryRunner<>(serverConfig, CPUTimeMetricQueryRunner.safeBuild(perSegmentOptimizingQueryRunner, toolChest, emitter, cpuTimeAccumulator, false));
}
use of org.apache.druid.query.QueryToolChest in project druid by druid-io.
the class MaterializedViewQueryQueryToolChestTest method testDecorateObjectMapper.
@Test
public void testDecorateObjectMapper() throws IOException {
GroupByQuery realQuery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, false)).build();
QueryToolChest queryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(GroupByQuery.class, new GroupByQueryQueryToolChest(null)).build()));
ObjectMapper objectMapper = queryToolChest.decorateObjectMapper(JSON_MAPPER, realQuery);
List<ResultRow> results = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(realQuery, "2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow(realQuery, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L));
List<MapBasedRow> expectedResults = results.stream().map(resultRow -> resultRow.toMapBasedRow(realQuery)).collect(Collectors.toList());
Assert.assertEquals("decorate-object-mapper", JSON_MAPPER.writerFor(new TypeReference<List<MapBasedRow>>() {
}).writeValueAsString(expectedResults), objectMapper.writeValueAsString(results));
}
use of org.apache.druid.query.QueryToolChest in project druid by druid-io.
the class MaterializedViewQueryQueryToolChestTest method testMakePostComputeManipulatorFn.
@Test
public void testMakePostComputeManipulatorFn() {
TimeseriesQuery realQuery = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(QueryRunnerTestHelper.ROWS_COUNT).descending(true).build();
MaterializedViewQuery materializedViewQuery = new MaterializedViewQuery(realQuery, null);
QueryToolChest materializedViewQueryQueryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(TimeseriesQuery.class, new TimeseriesQueryQueryToolChest()).build()));
Function postFn = materializedViewQueryQueryToolChest.makePostComputeManipulatorFn(materializedViewQuery, new MetricManipulationFn() {
@Override
public Object manipulate(AggregatorFactory factory, Object object) {
return "metricvalue1";
}
});
Result<TimeseriesResultValue> result = new Result<>(DateTimes.nowUtc(), new TimeseriesResultValue(ImmutableMap.of("dim1", "dimvalue1")));
Result<TimeseriesResultValue> postResult = (Result<TimeseriesResultValue>) postFn.apply(result);
Map<String, Object> postResultMap = postResult.getValue().getBaseObject();
Assert.assertEquals(postResult.getTimestamp(), result.getTimestamp());
Assert.assertEquals(postResultMap.size(), 2);
Assert.assertEquals(postResultMap.get(QueryRunnerTestHelper.ROWS_COUNT.getName()), "metricvalue1");
Assert.assertEquals(postResultMap.get("dim1"), "dimvalue1");
}
use of org.apache.druid.query.QueryToolChest in project druid by druid-io.
the class TimeseriesBenchmark method runQuery.
private static <T> List<T> runQuery(QueryRunnerFactory factory, QueryRunner runner, Query<T> query) {
QueryToolChest toolChest = factory.getToolchest();
QueryRunner<T> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(toolChest.preMergeQueryDecoration(runner)), toolChest);
Sequence<T> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
return queryResult.toList();
}
Aggregations