Search in sources :

Example 31 with ResponseContext

use of org.apache.druid.query.context.ResponseContext in project druid by druid-io.

the class GroupByStrategyV2 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplier = null;
    try {
        final GroupByQuery queryToRun;
        if (wasQueryPushedDown) {
            // If the query was pushed down, filters would have been applied downstream, so skip it here.
            queryToRun = query.withDimFilter(null).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY));
        } else {
            queryToRun = query;
        }
        resultSupplier = GroupByRowProcessor.process(queryToRun, wasQueryPushedDown ? queryToRun : subquery, subqueryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        final GroupByRowProcessor.ResultSupplier finalResultSupplier = resultSupplier;
        return Sequences.withBaggage(mergeResults((queryPlus, responseContext) -> finalResultSupplier.results(null), query, null), finalResultSupplier);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplier);
    }
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec)

Example 32 with ResponseContext

use of org.apache.druid.query.context.ResponseContext in project druid by druid-io.

the class MovingAverageQueryRunner method run.

@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
    MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
    List<Interval> intervals;
    final Period period;
    // Get the largest bucket from the list of averagers
    Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
    int buckets = opt.orElse(0);
    // Extend the interval beginning by specified bucket - 1
    if (maq.getGranularity() instanceof PeriodGranularity) {
        period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
        int offset = buckets <= 0 ? 0 : (1 - buckets);
        intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
    } else {
        throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
    }
    Sequence<Row> resultsSeq;
    DataSource dataSource = maq.getDataSource();
    if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
        // build groupBy query from movingAverage query
        GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
        GroupByQuery gbq = builder.build();
        ResponseContext gbqResponseContext = ResponseContext.createEmpty();
        gbqResponseContext.merge(responseContext);
        gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
        Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
    } else {
        // no dimensions, so optimize this as a TimeSeries
        TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
        ResponseContext tsqResponseContext = ResponseContext.createEmpty();
        tsqResponseContext.merge(responseContext);
        tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
        Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
    }
    // Process into period buckets
    Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
    // Apply the windows analysis functions
    Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
    // Apply any postAveragers
    Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
    // remove rows outside the reporting window
    List<Interval> reportingIntervals = maq.getIntervals();
    movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
    // Apply any having, sorting, and limits
    movingAvgResults = maq.applyLimit(movingAvgResults);
    return movingAvgResults;
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Row(org.apache.druid.data.input.Row) QueryStats(org.apache.druid.server.QueryStats) Interval(org.joda.time.Interval) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Period(org.joda.time.Period) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResponseContext(org.apache.druid.query.context.ResponseContext) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) Throwables(com.google.common.base.Throwables) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) RequestLogger(org.apache.druid.server.log.RequestLogger) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Result(org.apache.druid.query.Result) List(java.util.List) UnionDataSource(org.apache.druid.query.UnionDataSource) RequestLogLine(org.apache.druid.server.RequestLogLine) Optional(java.util.Optional) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Result(org.apache.druid.query.Result) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Period(org.joda.time.Period) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryStats(org.apache.druid.server.QueryStats) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) Interval(org.joda.time.Interval)

Example 33 with ResponseContext

use of org.apache.druid.query.context.ResponseContext in project druid by druid-io.

the class MaterializedViewQueryQueryToolChest method preMergeQueryDecoration.

@Override
public QueryRunner preMergeQueryDecoration(final QueryRunner runner) {
    return new QueryRunner() {

        @Override
        public Sequence run(QueryPlus queryPlus, ResponseContext responseContext) {
            Query realQuery = getRealQuery(queryPlus.getQuery());
            QueryToolChest realQueryToolChest = warehouse.getToolChest(realQuery);
            QueryRunner realQueryRunner = realQueryToolChest.preMergeQueryDecoration(new MaterializedViewQueryRunner(runner, optimizer));
            return realQueryRunner.run(queryPlus.withQuery(realQuery), responseContext);
        }
    };
}
Also used : Query(org.apache.druid.query.Query) ResponseContext(org.apache.druid.query.context.ResponseContext) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) QueryPlus(org.apache.druid.query.QueryPlus)

Example 34 with ResponseContext

use of org.apache.druid.query.context.ResponseContext in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<ResultRow> mergeResults(final QueryRunner<ResultRow> baseRunner, final GroupByQuery query, final ResponseContext responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<ResultRow> mergingQueryRunner = new ResultMergeQueryRunner<>(baseRunner, this::createResultComparator, this::createMergeFn);
    // Set up downstream context.
    final ImmutableMap.Builder<String, Object> context = ImmutableMap.builder();
    context.put("finalize", false);
    context.put(GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2);
    context.put(CTX_KEY_OUTERMOST, false);
    Granularity granularity = query.getGranularity();
    List<DimensionSpec> dimensionSpecs = query.getDimensions();
    // the CTX_TIMESTAMP_RESULT_FIELD is set in DruidQuery.java
    final String timestampResultField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
    final boolean hasTimestampResultField = (timestampResultField != null && !timestampResultField.isEmpty()) && query.getContextBoolean(CTX_KEY_OUTERMOST, true) && !query.isApplyLimitPushDown();
    int timestampResultFieldIndex = 0;
    if (hasTimestampResultField) {
        // sql like "group by city_id,time_floor(__time to day)",
        // the original translated query is granularity=all and dimensions:[d0, d1]
        // the better plan is granularity=day and dimensions:[d0]
        // but the ResultRow structure is changed from [d0, d1] to [__time, d0]
        // this structure should be fixed as [d0, d1] (actually it is [d0, __time]) before postAggs are called.
        // 
        // the above is the general idea of this optimization.
        // but from coding perspective, the granularity=all and "d0" dimension are referenced by many places,
        // eg: subtotals, having, grouping set, post agg,
        // there would be many many places need to be fixed if "d0" dimension is removed from query.dimensions
        // and the same to the granularity change.
        // so from easier coding perspective, this optimization is coded as groupby engine-level inner process change.
        // the most part of codes are in GroupByStrategyV2 about the process change between broker and compute node.
        // the basic logic like nested queries and subtotals are kept unchanged,
        // they will still see the granularity=all and the "d0" dimension.
        // 
        // the tradeoff is that GroupByStrategyV2 behaviors differently according to the query contexts set in DruidQuery
        // in another word,
        // the query generated by "explain plan for select ..." doesn't match to the native query ACTUALLY being executed,
        // the granularity and dimensions are slightly different.
        // now, part of the query plan logic is handled in GroupByStrategyV2, not only in DruidQuery.toGroupByQuery()
        final Granularity timestampResultFieldGranularity = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY);
        dimensionSpecs = query.getDimensions().stream().filter(dimensionSpec -> !dimensionSpec.getOutputName().equals(timestampResultField)).collect(Collectors.toList());
        granularity = timestampResultFieldGranularity;
        // when timestampResultField is the last dimension, should set sortByDimsFirst=true,
        // otherwise the downstream is sorted by row's timestamp first which makes the final ordering not as expected
        timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
        if (!query.getContextSortByDimsFirst() && timestampResultFieldIndex == query.getDimensions().size() - 1) {
            context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, true);
        }
        // it is actually equals to sortByDimsFirst=false
        if (query.getContextSortByDimsFirst() && timestampResultFieldIndex == 0) {
            context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
        }
    // when hasTimestampResultField=true and timestampResultField is neither first nor last dimension,
    // the DefaultLimitSpec will always do the reordering
    }
    final int timestampResultFieldIndexInOriginalDimensions = timestampResultFieldIndex;
    if (query.getUniversalTimestamp() != null && !hasTimestampResultField) {
        // universalTimestamp works only when granularity is all
        // hasTimestampResultField works only when granularity is all
        // fudgeTimestamp should not be used when hasTimestampResultField=true due to the row's actual timestamp is used
        context.put(CTX_KEY_FUDGE_TIMESTAMP, String.valueOf(query.getUniversalTimestamp().getMillis()));
    }
    // The having spec shouldn't be passed down, so we need to convey the existing limit push down status
    context.put(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, query.isApplyLimitPushDown());
    // Always request array result rows when passing the query downstream.
    context.put(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, true);
    final GroupByQuery newQuery = new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), granularity, dimensionSpecs, query.getAggregatorSpecs(), // Don't apply postaggregators on compute nodes
    ImmutableList.of(), // Don't do "having" clause until the end of this method.
    null, // higher-up).
    query.isApplyLimitPushDown() ? ((DefaultLimitSpec) query.getLimitSpec()).withOffsetToLimit() : null, query.getSubtotalsSpec(), query.getContext()).withOverriddenContext(context.build());
    final Sequence<ResultRow> mergedResults = mergingQueryRunner.run(QueryPlus.wrap(newQuery), responseContext);
    if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true) || query.getContextBoolean(GroupByQueryConfig.CTX_KEY_EXECUTING_NESTED_QUERY, false)) {
        return mergedResults;
    } else if (query.getPostAggregatorSpecs().isEmpty()) {
        if (!hasTimestampResultField) {
            return mergedResults;
        }
        return Sequences.map(mergedResults, row -> {
            final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
            moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, resultRow);
            return resultRow;
        });
    } else {
        return Sequences.map(mergedResults, row -> {
            // This function's purpose is to apply PostAggregators.
            final ResultRow rowWithPostAggregations = ResultRow.create(query.getResultRowSizeWithPostAggregators());
            // Copy everything that comes before the postaggregations.
            if (hasTimestampResultField) {
                moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, rowWithPostAggregations);
            } else {
                for (int i = 0; i < query.getResultRowPostAggregatorStart(); i++) {
                    rowWithPostAggregations.set(i, row.get(i));
                }
            }
            // Compute postaggregations. We need to do this with a result-row map because PostAggregator.compute
            // expects a map. Some further design adjustment may eliminate the need for it, and speed up this function.
            final Map<String, Object> mapForPostAggregationComputation = rowWithPostAggregations.toMap(query);
            for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
                final PostAggregator postAggregator = query.getPostAggregatorSpecs().get(i);
                final Object value = postAggregator.compute(mapForPostAggregationComputation);
                rowWithPostAggregations.set(query.getResultRowPostAggregatorStart() + i, value);
                mapForPostAggregationComputation.put(postAggregator.getName(), value);
            }
            return rowWithPostAggregations;
        });
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ImmutableMap(com.google.common.collect.ImmutableMap) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 35 with ResponseContext

use of org.apache.druid.query.context.ResponseContext in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
    // "legacy" should be non-null due to toolChest.mergeResults
    final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
    final Long numScannedRows = responseContext.getRowScanCount();
    if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
        return Sequences.empty();
    }
    final boolean hasTimeout = QueryContexts.hasTimeout(query);
    final Long timeoutAt = responseContext.getTimeoutTime();
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<String> allColumns = new ArrayList<>();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
            allColumns.add(LEGACY_TIMESTAMP_KEY);
        }
        // Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
        // the compactedList form easier to use.
        allColumns.addAll(query.getColumns());
    } else {
        final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
        allColumns.addAll(availableColumns);
        if (legacy) {
            allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
        }
    }
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final SegmentId segmentId = segment.getId();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
    // If the row count is not set, set it to 0, else do nothing.
    responseContext.addRowScanCount(0);
    final long limit = calculateRemainingScanRowsLimit(query, responseContext);
    return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

        @Override
        public Iterator<ScanResultValue> make() {
            final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
            for (String column : allColumns) {
                final BaseObjectColumnValueSelector selector;
                if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
                } else {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
                }
                columnSelectors.add(selector);
            }
            final int batchSize = query.getBatchSize();
            return new Iterator<ScanResultValue>() {

                private long offset = 0;

                @Override
                public boolean hasNext() {
                    return !cursor.isDone() && offset < limit;
                }

                @Override
                public ScanResultValue next() {
                    if (!hasNext()) {
                        throw new NoSuchElementException();
                    }
                    if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
                        throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
                    }
                    final long lastOffset = offset;
                    final Object events;
                    final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
                    if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                        events = rowsToCompactedList();
                    } else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
                        events = rowsToList();
                    } else {
                        throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
                    }
                    responseContext.addRowScanCount(offset - lastOffset);
                    if (hasTimeout) {
                        responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
                    }
                    return new ScanResultValue(segmentId.toString(), allColumns, events);
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private List<List<Object>> rowsToCompactedList() {
                    final List<List<Object>> events = new ArrayList<>(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final List<Object> theEvent = new ArrayList<>(allColumns.size());
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.add(getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private List<Map<String, Object>> rowsToList() {
                    List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final Map<String, Object> theEvent = new LinkedHashMap<>();
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.put(allColumns.get(j), getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private Object getColumnValue(int i) {
                    final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
                    final Object value;
                    if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
                        value = DateTimes.utc((long) selector.getObject());
                    } else {
                        value = selector == null ? null : selector.getObject();
                    }
                    return value;
                }
            };
        }

        @Override
        public void cleanup(Iterator<ScanResultValue> iterFromMake) {
        }
    })));
}
Also used : Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Map(java.util.Map) UOE(org.apache.druid.java.util.common.UOE) NoSuchElementException(java.util.NoSuchElementException) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Sequences(org.apache.druid.java.util.common.guava.Sequences) Segment(org.apache.druid.segment.Segment) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Iterator(java.util.Iterator) ResponseContext(org.apache.druid.query.context.ResponseContext) VirtualColumn(org.apache.druid.segment.VirtualColumn) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Sets(com.google.common.collect.Sets) QueryContexts(org.apache.druid.query.QueryContexts) Granularities(org.apache.druid.java.util.common.granularity.Granularities) List(java.util.List) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Preconditions(com.google.common.base.Preconditions) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) SegmentId(org.apache.druid.timeline.SegmentId) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Iterator(java.util.Iterator) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) SegmentId(org.apache.druid.timeline.SegmentId) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) UOE(org.apache.druid.java.util.common.UOE) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) Filter(org.apache.druid.query.filter.Filter) VirtualColumn(org.apache.druid.segment.VirtualColumn) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NoSuchElementException(java.util.NoSuchElementException) Interval(org.joda.time.Interval)

Aggregations

ResponseContext (org.apache.druid.query.context.ResponseContext)65 Test (org.junit.Test)44 QueryRunner (org.apache.druid.query.QueryRunner)39 QueryPlus (org.apache.druid.query.QueryPlus)35 Sequence (org.apache.druid.java.util.common.guava.Sequence)30 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)26 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)23 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)22 Interval (org.joda.time.Interval)19 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 ArrayList (java.util.ArrayList)14 MergeSequence (org.apache.druid.java.util.common.guava.MergeSequence)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 Result (org.apache.druid.query.Result)12 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)12 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 List (java.util.List)10 ChainedExecutionQueryRunner (org.apache.druid.query.ChainedExecutionQueryRunner)10 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)10 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)10