Search in sources :

Example 56 with OrderByColumnSpec

use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByExpressionMultiMultiAutoAutoDupeIdentifier.

@Test
public void testGroupByExpressionMultiMultiAutoAutoDupeIdentifier() {
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("texpr", "texpr")).setVirtualColumns(new ExpressionVirtualColumn("texpr", "concat(tags, tags)", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setLimitSpec(new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("count", OrderByColumnSpec.Direction.DESCENDING)), 5)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
    Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t5t5", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", NullHandling.emptyToNullIfNeeded(""), "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2t2", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto-self");
}
Also used : OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ResultRow(org.apache.druid.query.groupby.ResultRow) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 57 with OrderByColumnSpec

use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByQuery method getRowOrderingForPushDown.

/**
 * When limit push down is applied, the partial results would be sorted by the ordering specified by the
 * limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
 * so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
 */
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
    final boolean sortByDimsFirst = getContextSortByDimsFirst();
    final IntList orderedFieldNumbers = new IntArrayList();
    final Set<Integer> dimsInOrderBy = new HashSet<>();
    final List<Boolean> needsReverseList = new ArrayList<>();
    final List<ColumnType> dimensionTypes = new ArrayList<>();
    final List<StringComparator> comparators = new ArrayList<>();
    for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
        boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
        int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
        if (dimIndex >= 0) {
            DimensionSpec dim = dimensions.get(dimIndex);
            orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
            dimsInOrderBy.add(dimIndex);
            needsReverseList.add(needsReverse);
            final ColumnType type = dimensions.get(dimIndex).getOutputType();
            dimensionTypes.add(type);
            comparators.add(orderSpec.getDimensionComparator());
        }
    }
    for (int i = 0; i < dimensions.size(); i++) {
        if (!dimsInOrderBy.contains(i)) {
            orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
            needsReverseList.add(false);
            final ColumnType type = dimensions.get(i).getOutputType();
            dimensionTypes.add(type);
            comparators.add(StringComparators.LEXICOGRAPHIC);
        }
    }
    final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
    if (timeComparator == null) {
        return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
    } else if (sortByDimsFirst) {
        return Ordering.from((lhs, rhs) -> {
            final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
            if (cmp != 0) {
                return cmp;
            }
            return timeComparator.compare(lhs, rhs);
        });
    } else {
        return Ordering.from((lhs, rhs) -> {
            final int timeCompare = timeComparator.compare(lhs, rhs);
            if (timeCompare != 0) {
                return timeCompare;
            }
            return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
        });
    }
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Objects(java.util.Objects) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimFilter(org.apache.druid.query.filter.DimFilter) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HavingSpec(org.apache.druid.query.groupby.having.HavingSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ComparableList(org.apache.druid.segment.data.ComparableList) BaseQuery(org.apache.druid.query.BaseQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) VirtualColumns(org.apache.druid.segment.VirtualColumns) StringComparator(org.apache.druid.query.ordering.StringComparator) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) TableDataSource(org.apache.druid.query.TableDataSource) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Queries(org.apache.druid.query.Queries) IntList(it.unimi.dsi.fastutil.ints.IntList) Ordering(com.google.common.collect.Ordering) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) RowSignature(org.apache.druid.segment.column.RowSignature) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) Comparator(java.util.Comparator) Collections(java.util.Collections) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) StringComparator(org.apache.druid.query.ordering.StringComparator) IntList(it.unimi.dsi.fastutil.ints.IntList) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HashSet(java.util.HashSet)

Example 58 with OrderByColumnSpec

use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GrouperBufferComparatorUtils method bufferComparatorWithAggregators.

public static Grouper.BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets, DefaultLimitSpec limitSpec, List<DimensionSpec> dimensions, Grouper.BufferComparator[] dimComparators, boolean includeTimestamp, boolean sortByDimsFirst, int keySize) {
    int dimCount = dimensions.size();
    final List<Boolean> needsReverses = new ArrayList<>();
    List<Grouper.BufferComparator> comparators = new ArrayList<>();
    Set<Integer> orderByIndices = new HashSet<>();
    int aggCount = 0;
    boolean needsReverse;
    for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
        needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
        int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
        if (dimIndex >= 0) {
            comparators.add(dimComparators[dimIndex]);
            orderByIndices.add(dimIndex);
            needsReverses.add(needsReverse);
        } else {
            int aggIndex = OrderByColumnSpec.getAggIndexForOrderBy(orderSpec, Arrays.asList(aggregatorFactories));
            if (aggIndex >= 0) {
                final StringComparator stringComparator = orderSpec.getDimensionComparator();
                final ColumnType valueType = aggregatorFactories[aggIndex].getIntermediateType();
                // Aggregators start after dimensions
                final int aggOffset = keySize + aggregatorOffsets[aggIndex];
                aggCount++;
                if (!valueType.isNumeric()) {
                    throw new IAE("Cannot order by a non-numeric aggregator[%s]", orderSpec);
                }
                comparators.add(makeNullHandlingBufferComparatorForNumericData(aggOffset, makeNumericBufferComparator(valueType, aggOffset, true, stringComparator)));
                needsReverses.add(needsReverse);
            }
        }
    }
    for (int i = 0; i < dimCount; i++) {
        if (!orderByIndices.contains(i)) {
            comparators.add(dimComparators[i]);
            // default to Ascending order if dim is not in an orderby spec
            needsReverses.add(false);
        }
    }
    final Grouper.BufferComparator[] adjustedSerdeHelperComparators = comparators.toArray(new Grouper.BufferComparator[0]);
    final int fieldCount = dimCount + aggCount;
    if (includeTimestamp) {
        if (sortByDimsFirst) {
            return new Grouper.BufferComparator() {

                @Override
                public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                    final int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    if (cmp != 0) {
                        return cmp;
                    }
                    return Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
                }
            };
        } else {
            return new Grouper.BufferComparator() {

                @Override
                public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                    final int timeCompare = Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
                    if (timeCompare != 0) {
                        return timeCompare;
                    }
                    int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    return cmp;
                }
            };
        }
    } else {
        return new Grouper.BufferComparator() {

            @Override
            public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                for (int i = 0; i < fieldCount; i++) {
                    final int cmp;
                    if (needsReverses.get(i)) {
                        cmp = adjustedSerdeHelperComparators[i].compare(rhsBuffer, lhsBuffer, rhsPosition, lhsPosition);
                    } else {
                        cmp = adjustedSerdeHelperComparators[i].compare(lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    }
                    if (cmp != 0) {
                        return cmp;
                    }
                }
                return 0;
            }
        };
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) StringComparator(org.apache.druid.query.ordering.StringComparator) ByteBuffer(java.nio.ByteBuffer) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) HashSet(java.util.HashSet)

Example 59 with OrderByColumnSpec

use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new CountAggregatorFactory("cnt"));
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("A", queryA);
    }
    {
        // basic.sorted
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
        basicQueries.put("sorted", queryA);
    }
    {
        // basic.nested
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("nested", queryA);
    }
    {
        // basic.filter
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("filter", queryA);
    }
    {
        // basic.singleZipf
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("singleZipf", queryA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
    {
        // simple.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simple", simpleQueries);
    Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
    {
        // simpleLong.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleLongQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
    Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
    {
        // simpleFloat.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleFloatQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
    {
        // simple-null
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        nullQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}
Also used : BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LinkedHashMap(java.util.LinkedHashMap) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 60 with OrderByColumnSpec

use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.

@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setConcurrentEventAdd(true).setMaxRowCount(1000).build();
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    long timestamp = DateTimes.of("2010-01-01").getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(client_type, client_type)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
    TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) Segment(org.apache.druid.segment.Segment) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)111 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)105 Test (org.junit.Test)104 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)99 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)79 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)15 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)11 QueryPlus (org.apache.druid.query.QueryPlus)10 QueryRunner (org.apache.druid.query.QueryRunner)10 ResponseContext (org.apache.druid.query.context.ResponseContext)10 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)9 GreaterThanHavingSpec (org.apache.druid.query.groupby.having.GreaterThanHavingSpec)8 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)8 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)7 ListFilteredVirtualColumn (org.apache.druid.segment.virtual.ListFilteredVirtualColumn)7 ArrayList (java.util.ArrayList)6 Sequence (org.apache.druid.java.util.common.guava.Sequence)6 QueryDataSource (org.apache.druid.query.QueryDataSource)6 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)6