Search in sources :

Example 16 with QueryToolChest

use of org.apache.druid.query.QueryToolChest in project druid by druid-io.

the class ClientQuerySegmentWalker method inlineIfNecessary.

/**
 * Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
 *
 * 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
 * 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
 *
 * @param dataSource           datasource to process.
 * @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
 *                             will consider whether the toolchest can handle a subquery on the datasource using
 *                             {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
 *                             not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
 *                             for an example of a toolchest that can handle subqueries.
 * @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
 *                                    for a particular master query
 * @param maxSubqueryRows      Max rows that all the subqueries generated by a master query can have, combined
 * @param dryRun               if true, does not actually execute any subqueries, but will inline empty result sets.
 */
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
    if (dataSource instanceof QueryDataSource) {
        // This datasource is a subquery.
        final Query subQuery = ((QueryDataSource) dataSource).getQuery();
        final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
        if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
            // Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
            // them (e.g. subqueries nested under a join).
            final Stack<DataSource> stack = new Stack<>();
            DataSource current = dataSource;
            while (current instanceof QueryDataSource) {
                stack.push(current);
                current = Iterables.getOnlyElement(current.getChildren());
            }
            // lgtm [java/contradictory-type-checks]
            assert !(current instanceof QueryDataSource);
            current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            while (!stack.isEmpty()) {
                current = stack.pop().withChildren(Collections.singletonList(current));
            }
            assert current instanceof QueryDataSource;
            if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
                return current;
            } else {
                // We need to consider inlining it.
                return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            }
        } else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
            // Subquery needs to be inlined. Assign it a subquery id and run it.
            final Sequence<?> queryResults;
            if (dryRun) {
                queryResults = Sequences.empty();
            } else {
                final QueryRunner subqueryRunner = subQuery.getRunner(this);
                queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
            }
            return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
        } else {
            // Cannot inline subquery. Attempt to inline one level deeper, and then try again.
            return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
        }
    } else {
        // Not a query datasource. Walk children and see if there's anything to inline.
        return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
    }
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) Query(org.apache.druid.query.Query) QueryToolChest(org.apache.druid.query.QueryToolChest) Sequence(org.apache.druid.java.util.common.guava.Sequence) ResultLevelCachingQueryRunner(org.apache.druid.query.ResultLevelCachingQueryRunner) RetryQueryRunner(org.apache.druid.query.RetryQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Stack(java.util.Stack)

Example 17 with QueryToolChest

use of org.apache.druid.query.QueryToolChest in project druid by druid-io.

the class ServerManager method buildAndDecorateQueryRunner.

private <T> QueryRunner<T> buildAndDecorateQueryRunner(final QueryRunnerFactory<T, Query<T>> factory, final QueryToolChest<T, Query<T>> toolChest, final SegmentReference segment, final Optional<byte[]> cacheKeyPrefix, final SegmentDescriptor segmentDescriptor, final AtomicLong cpuTimeAccumulator) {
    final SpecificSegmentSpec segmentSpec = new SpecificSegmentSpec(segmentDescriptor);
    final SegmentId segmentId = segment.getId();
    final Interval segmentInterval = segment.getDataInterval();
    // If the segment is closed after this line, ReferenceCountingSegmentQueryRunner will handle and do the right thing.
    if (segmentId == null || segmentInterval == null) {
        return new ReportTimelineMissingSegmentQueryRunner<>(segmentDescriptor);
    }
    String segmentIdString = segmentId.toString();
    MetricsEmittingQueryRunner<T> metricsEmittingQueryRunnerInner = new MetricsEmittingQueryRunner<>(emitter, toolChest, new ReferenceCountingSegmentQueryRunner<>(factory, segment, segmentDescriptor), QueryMetrics::reportSegmentTime, queryMetrics -> queryMetrics.segment(segmentIdString));
    StorageAdapter storageAdapter = segment.asStorageAdapter();
    long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
    long segmentMinTime = storageAdapter.getMinTime().getMillis();
    Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
    CachingQueryRunner<T> cachingQueryRunner = new CachingQueryRunner<>(segmentIdString, cacheKeyPrefix, segmentDescriptor, actualDataInterval, objectMapper, cache, toolChest, metricsEmittingQueryRunnerInner, cachePopulator, cacheConfig);
    BySegmentQueryRunner<T> bySegmentQueryRunner = new BySegmentQueryRunner<>(segmentId, segmentInterval.getStart(), cachingQueryRunner);
    MetricsEmittingQueryRunner<T> metricsEmittingQueryRunnerOuter = new MetricsEmittingQueryRunner<>(emitter, toolChest, bySegmentQueryRunner, QueryMetrics::reportSegmentAndCacheTime, queryMetrics -> queryMetrics.segment(segmentIdString)).withWaitMeasuredFromNow();
    SpecificSegmentQueryRunner<T> specificSegmentQueryRunner = new SpecificSegmentQueryRunner<>(metricsEmittingQueryRunnerOuter, segmentSpec);
    PerSegmentOptimizingQueryRunner<T> perSegmentOptimizingQueryRunner = new PerSegmentOptimizingQueryRunner<>(specificSegmentQueryRunner, new PerSegmentQueryOptimizationContext(segmentDescriptor));
    return new SetAndVerifyContextQueryRunner<>(serverConfig, CPUTimeMetricQueryRunner.safeBuild(perSegmentOptimizingQueryRunner, toolChest, emitter, cpuTimeAccumulator, false));
}
Also used : SegmentManager(org.apache.druid.server.SegmentManager) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) StorageAdapter(org.apache.druid.segment.StorageAdapter) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) SegmentReference(org.apache.druid.segment.SegmentReference) SpecificSegmentQueryRunner(org.apache.druid.query.spec.SpecificSegmentQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) ReportTimelineMissingSegmentQueryRunner(org.apache.druid.query.ReportTimelineMissingSegmentQueryRunner) CacheConfig(org.apache.druid.client.cache.CacheConfig) StringUtils(org.apache.druid.java.util.common.StringUtils) JoinableFactoryWrapper(org.apache.druid.segment.join.JoinableFactoryWrapper) ISE(org.apache.druid.java.util.common.ISE) SpecificSegmentSpec(org.apache.druid.query.spec.SpecificSegmentSpec) BySegmentQueryRunner(org.apache.druid.query.BySegmentQueryRunner) SetAndVerifyContextQueryRunner(org.apache.druid.server.SetAndVerifyContextQueryRunner) QueryDataSource(org.apache.druid.query.QueryDataSource) PerSegmentQueryOptimizationContext(org.apache.druid.query.PerSegmentQueryOptimizationContext) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) Optional(java.util.Optional) PerSegmentOptimizingQueryRunner(org.apache.druid.query.PerSegmentOptimizingQueryRunner) FunctionalIterable(org.apache.druid.java.util.common.guava.FunctionalIterable) SegmentId(org.apache.druid.timeline.SegmentId) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Intervals(org.apache.druid.java.util.common.Intervals) QueryMetrics(org.apache.druid.query.QueryMetrics) CachingQueryRunner(org.apache.druid.client.CachingQueryRunner) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) Function(java.util.function.Function) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) MetricsEmittingQueryRunner(org.apache.druid.query.MetricsEmittingQueryRunner) Query(org.apache.druid.query.Query) CachePopulator(org.apache.druid.client.cache.CachePopulator) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ServerConfig(org.apache.druid.server.initialization.ServerConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) QueryToolChest(org.apache.druid.query.QueryToolChest) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReferenceCountingSegmentQueryRunner(org.apache.druid.query.ReferenceCountingSegmentQueryRunner) QueryRunnerFactory(org.apache.druid.query.QueryRunnerFactory) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Cache(org.apache.druid.client.cache.Cache) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) CPUTimeMetricQueryRunner(org.apache.druid.query.CPUTimeMetricQueryRunner) QueryUnsupportedException(org.apache.druid.query.QueryUnsupportedException) SegmentId(org.apache.druid.timeline.SegmentId) StorageAdapter(org.apache.druid.segment.StorageAdapter) BySegmentQueryRunner(org.apache.druid.query.BySegmentQueryRunner) MetricsEmittingQueryRunner(org.apache.druid.query.MetricsEmittingQueryRunner) PerSegmentQueryOptimizationContext(org.apache.druid.query.PerSegmentQueryOptimizationContext) ReportTimelineMissingSegmentQueryRunner(org.apache.druid.query.ReportTimelineMissingSegmentQueryRunner) SpecificSegmentSpec(org.apache.druid.query.spec.SpecificSegmentSpec) SpecificSegmentQueryRunner(org.apache.druid.query.spec.SpecificSegmentQueryRunner) CachingQueryRunner(org.apache.druid.client.CachingQueryRunner) SetAndVerifyContextQueryRunner(org.apache.druid.server.SetAndVerifyContextQueryRunner) QueryMetrics(org.apache.druid.query.QueryMetrics) PerSegmentOptimizingQueryRunner(org.apache.druid.query.PerSegmentOptimizingQueryRunner) Interval(org.joda.time.Interval)

Example 18 with QueryToolChest

use of org.apache.druid.query.QueryToolChest in project druid by druid-io.

the class MaterializedViewQueryQueryToolChestTest method testDecorateObjectMapper.

@Test
public void testDecorateObjectMapper() throws IOException {
    GroupByQuery realQuery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, false)).build();
    QueryToolChest queryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(GroupByQuery.class, new GroupByQueryQueryToolChest(null)).build()));
    ObjectMapper objectMapper = queryToolChest.decorateObjectMapper(JSON_MAPPER, realQuery);
    List<ResultRow> results = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(realQuery, "2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow(realQuery, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L));
    List<MapBasedRow> expectedResults = results.stream().map(resultRow -> resultRow.toMapBasedRow(realQuery)).collect(Collectors.toList());
    Assert.assertEquals("decorate-object-mapper", JSON_MAPPER.writerFor(new TypeReference<List<MapBasedRow>>() {
    }).writeValueAsString(expectedResults), objectMapper.writeValueAsString(results));
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) Arrays(java.util.Arrays) MapBasedRow(org.apache.druid.data.input.MapBasedRow) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Druids(org.apache.druid.query.Druids) Query(org.apache.druid.query.Query) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) Map(java.util.Map) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TypeReference(com.fasterxml.jackson.core.type.TypeReference) MetricManipulationFn(org.apache.druid.query.aggregation.MetricManipulationFn) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) MapQueryToolChestWarehouse(org.apache.druid.query.MapQueryToolChestWarehouse) QueryToolChest(org.apache.druid.query.QueryToolChest) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GroupByQueryRunnerTestHelper(org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper) Result(org.apache.druid.query.Result) List(java.util.List) QueryRunnerTestHelper(org.apache.druid.query.QueryRunnerTestHelper) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) Assert(org.junit.Assert) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) MapBasedRow(org.apache.druid.data.input.MapBasedRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) MapQueryToolChestWarehouse(org.apache.druid.query.MapQueryToolChestWarehouse) List(java.util.List) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Example 19 with QueryToolChest

use of org.apache.druid.query.QueryToolChest in project druid by druid-io.

the class MaterializedViewQueryQueryToolChestTest method testMakePostComputeManipulatorFn.

@Test
public void testMakePostComputeManipulatorFn() {
    TimeseriesQuery realQuery = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(QueryRunnerTestHelper.ROWS_COUNT).descending(true).build();
    MaterializedViewQuery materializedViewQuery = new MaterializedViewQuery(realQuery, null);
    QueryToolChest materializedViewQueryQueryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(TimeseriesQuery.class, new TimeseriesQueryQueryToolChest()).build()));
    Function postFn = materializedViewQueryQueryToolChest.makePostComputeManipulatorFn(materializedViewQuery, new MetricManipulationFn() {

        @Override
        public Object manipulate(AggregatorFactory factory, Object object) {
            return "metricvalue1";
        }
    });
    Result<TimeseriesResultValue> result = new Result<>(DateTimes.nowUtc(), new TimeseriesResultValue(ImmutableMap.of("dim1", "dimvalue1")));
    Result<TimeseriesResultValue> postResult = (Result<TimeseriesResultValue>) postFn.apply(result);
    Map<String, Object> postResultMap = postResult.getValue().getBaseObject();
    Assert.assertEquals(postResult.getTimestamp(), result.getTimestamp());
    Assert.assertEquals(postResultMap.size(), 2);
    Assert.assertEquals(postResultMap.get(QueryRunnerTestHelper.ROWS_COUNT.getName()), "metricvalue1");
    Assert.assertEquals(postResultMap.get("dim1"), "dimvalue1");
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Result(org.apache.druid.query.Result) Function(com.google.common.base.Function) MapQueryToolChestWarehouse(org.apache.druid.query.MapQueryToolChestWarehouse) MetricManipulationFn(org.apache.druid.query.aggregation.MetricManipulationFn) Test(org.junit.Test)

Example 20 with QueryToolChest

use of org.apache.druid.query.QueryToolChest in project druid by druid-io.

the class TimeseriesBenchmark method runQuery.

private static <T> List<T> runQuery(QueryRunnerFactory factory, QueryRunner runner, Query<T> query) {
    QueryToolChest toolChest = factory.getToolchest();
    QueryRunner<T> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(toolChest.preMergeQueryDecoration(runner)), toolChest);
    Sequence<T> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
    return queryResult.toList();
}
Also used : FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest)

Aggregations

QueryToolChest (org.apache.druid.query.QueryToolChest)47 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)33 QueryRunner (org.apache.druid.query.QueryRunner)28 Test (org.junit.Test)23 Result (org.apache.druid.query.Result)18 Query (org.apache.druid.query.Query)17 ExecutorService (java.util.concurrent.ExecutorService)14 TimeseriesQueryQueryToolChest (org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest)14 ArrayList (java.util.ArrayList)13 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)12 TopNQueryQueryToolChest (org.apache.druid.query.topn.TopNQueryQueryToolChest)10 ColumnAnalysis (org.apache.druid.query.metadata.metadata.ColumnAnalysis)8 ListColumnIncluderator (org.apache.druid.query.metadata.metadata.ListColumnIncluderator)8 SegmentAnalysis (org.apache.druid.query.metadata.metadata.SegmentAnalysis)8 SegmentMetadataQuery (org.apache.druid.query.metadata.metadata.SegmentMetadataQuery)8 TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)8 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 QueryRunnerFactory (org.apache.druid.query.QueryRunnerFactory)6 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)6