Examples with GroupingAggregatorFactory - org.apache.druid.query.aggregation.GroupingAggregatorFactory

Example 1 with GroupingAggregatorFactory

use of org.apache.druid.query.aggregation.GroupingAggregatorFactory in project druid by druid-io.

the class CalciteQueryTest method testGroupingAggregatorDifferentOrder.

@Test
public void testGroupingAggregatorDifferentOrder() throws Exception {
    requireMergeBuffers(3);
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("SELECT dim2, gran, SUM(cnt), GROUPING(gran, dim2)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + "GROUP BY GROUPING SETS ( (dim2, gran), (dim2), (gran), () )", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "case_searched(notnull(\"dim2\"),\"dim2\",'')", ColumnType.STRING), expressionVirtualColumn("v1", "timestamp_floor(\"__time\",'P1M',null,'UTC')", ColumnType.LONG)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"), new DefaultDimensionSpec("v1", "d1", ColumnType.LONG))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new GroupingAggregatorFactory("a1", Arrays.asList("v1", "v0")))).setSubtotalsSpec(ImmutableList.of(ImmutableList.of("d0", "d1"), ImmutableList.of("d0"), ImmutableList.of("d1"), ImmutableList.of())).setContext(withTimestampResultContext(QUERY_CONTEXT_DEFAULT, "d1", 1, Granularities.MONTH)).build()), ImmutableList.of(new Object[] { "", timestamp("2000-01-01"), 2L, 0L }, new Object[] { "", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "a", timestamp("2000-01-01"), 1L, 0L }, new Object[] { "a", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "abc", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "", null, 3L, 2L }, new Object[] { "a", null, 2L, 2L }, new Object[] { "abc", null, 1L, 2L }, new Object[] { NULL_STRING, timestamp("2000-01-01"), 3L, 1L }, new Object[] { NULL_STRING, timestamp("2001-01-01"), 3L, 1L }, new Object[] { NULL_STRING, null, 6L, 3L }));
}

Also used : GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 2 with GroupingAggregatorFactory

use of org.apache.druid.query.aggregation.GroupingAggregatorFactory in project druid by druid-io.

the class CalciteQueryTest method testGroupingSets.

@Test
public void testGroupingSets() throws Exception {
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("SELECT dim2, gran, SUM(cnt), GROUPING(dim2, gran)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + "GROUP BY GROUPING SETS ( (dim2, gran), (dim2), (gran), () )", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "case_searched(notnull(\"dim2\"),\"dim2\",'')", ColumnType.STRING), expressionVirtualColumn("v1", "timestamp_floor(\"__time\",'P1M',null,'UTC')", ColumnType.LONG)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0"), new DefaultDimensionSpec("v1", "d1", ColumnType.LONG))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new GroupingAggregatorFactory("a1", Arrays.asList("v0", "v1")))).setSubtotalsSpec(ImmutableList.of(ImmutableList.of("d0", "d1"), ImmutableList.of("d0"), ImmutableList.of("d1"), ImmutableList.of())).setContext(withTimestampResultContext(QUERY_CONTEXT_DEFAULT, "d1", 1, Granularities.MONTH)).build()), ImmutableList.of(new Object[] { "", timestamp("2000-01-01"), 2L, 0L }, new Object[] { "", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "a", timestamp("2000-01-01"), 1L, 0L }, new Object[] { "a", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "abc", timestamp("2001-01-01"), 1L, 0L }, new Object[] { "", null, 3L, 1L }, new Object[] { "a", null, 2L, 1L }, new Object[] { "abc", null, 1L, 1L }, new Object[] { NULL_STRING, timestamp("2000-01-01"), 3L, 2L }, new Object[] { NULL_STRING, timestamp("2001-01-01"), 3L, 2L }, new Object[] { NULL_STRING, null, 6L, 3L }));
}

Example 3 with GroupingAggregatorFactory

use of org.apache.druid.query.aggregation.GroupingAggregatorFactory in project druid by druid-io.

the class CalciteQueryTest method testQueryWithSelectProjectAndIdentityProjectDoesNotRename.

@Test
public void testQueryWithSelectProjectAndIdentityProjectDoesNotRename() throws Exception {
    cannotVectorize();
    requireMergeBuffers(3);
    testQuery(PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT, "true")), "SELECT\n" + "(SUM(CASE WHEN (TIMESTAMP '2000-01-04 17:00:00'<=__time AND __time<TIMESTAMP '2022-01-05 17:00:00') THEN 1 ELSE 0 END)*1.0/COUNT(DISTINCT CASE WHEN (TIMESTAMP '2000-01-04 17:00:00'<=__time AND __time<TIMESTAMP '2022-01-05 17:00:00') THEN dim1 END))\n" + "FROM druid.foo\n" + "GROUP BY ()", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "case_searched(((947005200000 <= \"__time\") && (\"__time\" < 1641402000000)),\"dim1\",null)", ColumnType.STRING)).setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", null, "case_searched(((947005200000 <= \"__time\") && (\"__time\" < 1641402000000)),1,0)", ExprMacroTable.nil()), new GroupingAggregatorFactory("a1", ImmutableList.of("v0")))).setSubtotalsSpec(ImmutableList.of(ImmutableList.of("d0"), ImmutableList.of())).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new LongMinAggregatorFactory("_a0", "a0"), selector("a1", "1", null)), new FilteredAggregatorFactory(new CountAggregatorFactory("_a1"), and(not(selector("d0", null, null)), selector("a1", "0", null))))).setPostAggregatorSpecs(Collections.singletonList(new ExpressionPostAggregator("p0", "((\"_a0\" * 1.0) / \"_a1\")", null, ExprMacroTable.nil()))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1.0d }));
}

Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) ExpressionPostAggregator(org.apache.druid.query.aggregation.post.ExpressionPostAggregator) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LongMinAggregatorFactory(org.apache.druid.query.aggregation.LongMinAggregatorFactory) Test(org.junit.Test)

Example 4 with GroupingAggregatorFactory

use of org.apache.druid.query.aggregation.GroupingAggregatorFactory in project druid by druid-io.

the class GroupingSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    List<String> arguments = aggregateCall.getArgList().stream().map(i -> getColumnName(plannerContext, rowSignature, project, virtualColumnRegistry, i)).filter(Objects::nonNull).collect(Collectors.toList());
    if (arguments.size() < aggregateCall.getArgList().size()) {
        return null;
    }
    for (Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (!(factory instanceof GroupingAggregatorFactory)) {
                continue;
            }
            GroupingAggregatorFactory groupingFactory = (GroupingAggregatorFactory) factory;
            if (groupingFactory.getGroupings().equals(arguments) && groupingFactory.getName().equals(name)) {
                return Aggregation.create(groupingFactory);
            }
        }
    }
    AggregatorFactory factory = new GroupingAggregatorFactory(name, arguments);
    return Aggregation.create(factory);
}

Also used : Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) Nullable(javax.annotation.Nullable)

Example 5 with GroupingAggregatorFactory

use of org.apache.druid.query.aggregation.GroupingAggregatorFactory in project druid by druid-io.

the class RowBasedGrouperHelper method makeGrouperIterator.

public static CloseableGrouperIterator<RowBasedKey, ResultRow> makeGrouperIterator(final Grouper<RowBasedKey> grouper, final GroupByQuery query, @Nullable final List<DimensionSpec> dimsToInclude, final Closeable closeable) {
    final boolean includeTimestamp = query.getResultRowHasTimestamp();
    final BitSet dimsToIncludeBitSet = new BitSet(query.getDimensions().size());
    final int resultRowDimensionStart = query.getResultRowDimensionStart();
    final BitSet groupingAggregatorsBitSet = new BitSet(query.getAggregatorSpecs().size());
    final Object[] groupingAggregatorValues = new Long[query.getAggregatorSpecs().size()];
    if (dimsToInclude != null) {
        for (DimensionSpec dimensionSpec : dimsToInclude) {
            String outputName = dimensionSpec.getOutputName();
            final int dimIndex = query.getResultRowSignature().indexOf(outputName);
            if (dimIndex >= 0) {
                dimsToIncludeBitSet.set(dimIndex - resultRowDimensionStart);
            }
        }
        // KeyDimensionNames are the input column names of dimensions. Its required since aggregators are not aware of the
        // output column names.
        // As we exclude certain dimensions from the result row, the value for any grouping_id aggregators have to change
        // to reflect the new grouping dimensions, that aggregation is being done upon. We will mark the indices which have
        // grouping aggregators and update the value for each row at those indices.
        Set<String> keyDimensionNames = dimsToInclude.stream().map(DimensionSpec::getDimension).collect(Collectors.toSet());
        for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
            AggregatorFactory aggregatorFactory = query.getAggregatorSpecs().get(i);
            if (aggregatorFactory instanceof GroupingAggregatorFactory) {
                groupingAggregatorsBitSet.set(i);
                groupingAggregatorValues[i] = ((GroupingAggregatorFactory) aggregatorFactory).withKeyDimensions(keyDimensionNames).getValue();
            }
        }
    }
    return new CloseableGrouperIterator<>(grouper.iterator(true), entry -> {
        final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
        // Add timestamp, maybe.
        if (includeTimestamp) {
            final DateTime timestamp = query.getGranularity().toDateTime(((long) (entry.getKey().getKey()[0])));
            resultRow.set(0, timestamp.getMillis());
        }
        // Add dimensions.
        for (int i = resultRowDimensionStart; i < entry.getKey().getKey().length; i++) {
            if (dimsToInclude == null || dimsToIncludeBitSet.get(i - resultRowDimensionStart)) {
                final Object dimVal = entry.getKey().getKey()[i];
                resultRow.set(i, dimVal instanceof String ? NullHandling.emptyToNullIfNeeded((String) dimVal) : dimVal);
            }
        }
        // Add aggregations.
        final int resultRowAggregatorStart = query.getResultRowAggregatorStart();
        for (int i = 0; i < entry.getValues().length; i++) {
            if (dimsToInclude != null && groupingAggregatorsBitSet.get(i)) {
                // Override with a new value, reflecting the new set of grouping dimensions
                resultRow.set(resultRowAggregatorStart + i, groupingAggregatorValues[i]);
            } else {
                resultRow.set(resultRowAggregatorStart + i, entry.getValues()[i]);
            }
        }
        return resultRow;
    }, closeable);
}

Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BitSet(java.util.BitSet) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime)

Aggregations

GroupingAggregatorFactory (org.apache.druid.query.aggregation.GroupingAggregatorFactory)8 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)6 Test (org.junit.Test)6 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)5 QueryDataSource (org.apache.druid.query.QueryDataSource)3 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)3 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)3 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)2 LongMinAggregatorFactory (org.apache.druid.query.aggregation.LongMinAggregatorFactory)2 ExpressionPostAggregator (org.apache.druid.query.aggregation.post.ExpressionPostAggregator)2 BitSet (java.util.BitSet)1 Nullable (javax.annotation.Nullable)1 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)1 ResultRow (org.apache.druid.query.groupby.ResultRow)1 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)1 DateTime (org.joda.time.DateTime)1