Search in sources :

Example 96 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<ResultRow> mergeResults(final QueryRunner<ResultRow> baseRunner, final GroupByQuery query, final ResponseContext responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<ResultRow> mergingQueryRunner = new ResultMergeQueryRunner<>(baseRunner, this::createResultComparator, this::createMergeFn);
    // Set up downstream context.
    final ImmutableMap.Builder<String, Object> context = ImmutableMap.builder();
    context.put("finalize", false);
    context.put(GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2);
    context.put(CTX_KEY_OUTERMOST, false);
    Granularity granularity = query.getGranularity();
    List<DimensionSpec> dimensionSpecs = query.getDimensions();
    // the CTX_TIMESTAMP_RESULT_FIELD is set in DruidQuery.java
    final String timestampResultField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
    final boolean hasTimestampResultField = (timestampResultField != null && !timestampResultField.isEmpty()) && query.getContextBoolean(CTX_KEY_OUTERMOST, true) && !query.isApplyLimitPushDown();
    int timestampResultFieldIndex = 0;
    if (hasTimestampResultField) {
        // sql like "group by city_id,time_floor(__time to day)",
        // the original translated query is granularity=all and dimensions:[d0, d1]
        // the better plan is granularity=day and dimensions:[d0]
        // but the ResultRow structure is changed from [d0, d1] to [__time, d0]
        // this structure should be fixed as [d0, d1] (actually it is [d0, __time]) before postAggs are called.
        // 
        // the above is the general idea of this optimization.
        // but from coding perspective, the granularity=all and "d0" dimension are referenced by many places,
        // eg: subtotals, having, grouping set, post agg,
        // there would be many many places need to be fixed if "d0" dimension is removed from query.dimensions
        // and the same to the granularity change.
        // so from easier coding perspective, this optimization is coded as groupby engine-level inner process change.
        // the most part of codes are in GroupByStrategyV2 about the process change between broker and compute node.
        // the basic logic like nested queries and subtotals are kept unchanged,
        // they will still see the granularity=all and the "d0" dimension.
        // 
        // the tradeoff is that GroupByStrategyV2 behaviors differently according to the query contexts set in DruidQuery
        // in another word,
        // the query generated by "explain plan for select ..." doesn't match to the native query ACTUALLY being executed,
        // the granularity and dimensions are slightly different.
        // now, part of the query plan logic is handled in GroupByStrategyV2, not only in DruidQuery.toGroupByQuery()
        final Granularity timestampResultFieldGranularity = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY);
        dimensionSpecs = query.getDimensions().stream().filter(dimensionSpec -> !dimensionSpec.getOutputName().equals(timestampResultField)).collect(Collectors.toList());
        granularity = timestampResultFieldGranularity;
        // when timestampResultField is the last dimension, should set sortByDimsFirst=true,
        // otherwise the downstream is sorted by row's timestamp first which makes the final ordering not as expected
        timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
        if (!query.getContextSortByDimsFirst() && timestampResultFieldIndex == query.getDimensions().size() - 1) {
            context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, true);
        }
        // it is actually equals to sortByDimsFirst=false
        if (query.getContextSortByDimsFirst() && timestampResultFieldIndex == 0) {
            context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
        }
    // when hasTimestampResultField=true and timestampResultField is neither first nor last dimension,
    // the DefaultLimitSpec will always do the reordering
    }
    final int timestampResultFieldIndexInOriginalDimensions = timestampResultFieldIndex;
    if (query.getUniversalTimestamp() != null && !hasTimestampResultField) {
        // universalTimestamp works only when granularity is all
        // hasTimestampResultField works only when granularity is all
        // fudgeTimestamp should not be used when hasTimestampResultField=true due to the row's actual timestamp is used
        context.put(CTX_KEY_FUDGE_TIMESTAMP, String.valueOf(query.getUniversalTimestamp().getMillis()));
    }
    // The having spec shouldn't be passed down, so we need to convey the existing limit push down status
    context.put(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, query.isApplyLimitPushDown());
    // Always request array result rows when passing the query downstream.
    context.put(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, true);
    final GroupByQuery newQuery = new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), granularity, dimensionSpecs, query.getAggregatorSpecs(), // Don't apply postaggregators on compute nodes
    ImmutableList.of(), // Don't do "having" clause until the end of this method.
    null, // higher-up).
    query.isApplyLimitPushDown() ? ((DefaultLimitSpec) query.getLimitSpec()).withOffsetToLimit() : null, query.getSubtotalsSpec(), query.getContext()).withOverriddenContext(context.build());
    final Sequence<ResultRow> mergedResults = mergingQueryRunner.run(QueryPlus.wrap(newQuery), responseContext);
    if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true) || query.getContextBoolean(GroupByQueryConfig.CTX_KEY_EXECUTING_NESTED_QUERY, false)) {
        return mergedResults;
    } else if (query.getPostAggregatorSpecs().isEmpty()) {
        if (!hasTimestampResultField) {
            return mergedResults;
        }
        return Sequences.map(mergedResults, row -> {
            final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
            moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, resultRow);
            return resultRow;
        });
    } else {
        return Sequences.map(mergedResults, row -> {
            // This function's purpose is to apply PostAggregators.
            final ResultRow rowWithPostAggregations = ResultRow.create(query.getResultRowSizeWithPostAggregators());
            // Copy everything that comes before the postaggregations.
            if (hasTimestampResultField) {
                moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, rowWithPostAggregations);
            } else {
                for (int i = 0; i < query.getResultRowPostAggregatorStart(); i++) {
                    rowWithPostAggregations.set(i, row.get(i));
                }
            }
            // Compute postaggregations. We need to do this with a result-row map because PostAggregator.compute
            // expects a map. Some further design adjustment may eliminate the need for it, and speed up this function.
            final Map<String, Object> mapForPostAggregationComputation = rowWithPostAggregations.toMap(query);
            for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
                final PostAggregator postAggregator = query.getPostAggregatorSpecs().get(i);
                final Object value = postAggregator.compute(mapForPostAggregationComputation);
                rowWithPostAggregations.set(query.getResultRowPostAggregatorStart() + i, value);
                mapForPostAggregationComputation.put(postAggregator.getName(), value);
            }
            return rowWithPostAggregations;
        });
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ImmutableMap(com.google.common.collect.ImmutableMap) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 97 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class GroupByStrategyV2 method processSubtotalsSpec.

@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
    // How it works?
    // First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
    // Next for each subtotalSpec
    // If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
    // sorted by subtotalSpec, stream merge them and return.
    // 
    // If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
    // resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
    // of course sorted by subtotalSpec, stream merge them and return.
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
    try {
        // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
        // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
        // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
        // one-by-one soon enough.
        GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
        resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
        // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
        Set<String> aggsAndPostAggs = null;
        if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
            aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
        }
        List<List<String>> subtotals = query.getSubtotalsSpec();
        List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
        // Iterate through each subtotalSpec, build results for it and add to subtotalsResults
        for (List<String> subtotalSpec : subtotals) {
            final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
            // Dimension spec including dimension name and output name
            final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
            final List<DimensionSpec> dimensions = query.getDimensions();
            for (DimensionSpec dimensionSpec : dimensions) {
                if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
                    subTotalDimensionSpec.add(dimensionSpec);
                }
            }
            // Create appropriate LimitSpec for subtotal query
            LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
            if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
                Set<String> columns = new HashSet<>(aggsAndPostAggs);
                columns.addAll(subtotalSpec);
                subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
            }
            GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
            final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
            if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
                // Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
                // by subtotalSpec as needed by stream merging.
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
            } else {
                // Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
                // by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
                // by subtotalSpec and then stream merge them.
                // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
                // merge buffers for processing subtotal.
                Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
            }
        }
        return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
        resultSupplierOne);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
    }
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) HashSet(java.util.HashSet)

Example 98 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByExpressionFoldArrayToString.

@Test
public void testGroupByExpressionFoldArrayToString() {
    if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
        expectedException.expect(RuntimeException.class);
        expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
    }
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("tt", "tt")).setVirtualColumns(new ExpressionVirtualColumn("tt", "fold((tag, acc) -> concat(acc, tag), tags, '')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
    Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", "tt", NullHandling.replaceWithDefault() ? null : "", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t1t2t3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t3t4t5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t5t6t7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 99 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByExpressionFoldArrayToStringWithConcats.

@Test
public void testGroupByExpressionFoldArrayToStringWithConcats() {
    if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
        expectedException.expect(RuntimeException.class);
        expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
    }
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("tt", "tt")).setVirtualColumns(new ExpressionVirtualColumn("tt", "fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag))), tags, '')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
    Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? null : "foo", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 100 with ResultRow

use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByExpressionAutoArrayFnArg.

@Test
public void testGroupByExpressionAutoArrayFnArg() {
    if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
        expectedException.expect(RuntimeException.class);
        expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
    }
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("tt", "tt")).setVirtualColumns(new ExpressionVirtualColumn("tt", "array_to_string(concat('foo', tags), ', ')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
    Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foo", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ResultRow (org.apache.druid.query.groupby.ResultRow)129 Test (org.junit.Test)81 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)65 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)59 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)58 File (java.io.File)39 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)37 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)34 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)24 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)20 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)18 List (java.util.List)17 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 ArrayList (java.util.ArrayList)14 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)14 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)13 ByteBuffer (java.nio.ByteBuffer)12