Search in sources :

Example 16 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class GroupByQuery method compareDimsForLimitPushDown.

private static int compareDimsForLimitPushDown(final IntList fields, final List<Boolean> needsReverseList, final List<ColumnType> dimensionTypes, final List<StringComparator> comparators, final ResultRow lhs, final ResultRow rhs) {
    for (int i = 0; i < fields.size(); i++) {
        final int fieldNumber = fields.getInt(i);
        final StringComparator comparator = comparators.get(i);
        final ColumnType dimensionType = dimensionTypes.get(i);
        final int dimCompare;
        final Object lhsObj = lhs.get(fieldNumber);
        final Object rhsObj = rhs.get(fieldNumber);
        if (dimensionType.isNumeric()) {
            if (comparator.equals(StringComparators.NUMERIC)) {
                dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhsObj, rhsObj, dimensionType);
            } else {
                dimCompare = comparator.compare(String.valueOf(lhsObj), String.valueOf(rhsObj));
            }
        } else if (dimensionType.equals(ColumnType.STRING_ARRAY)) {
            final ComparableStringArray lhsArr = DimensionHandlerUtils.convertToComparableStringArray(lhsObj);
            final ComparableStringArray rhsArr = DimensionHandlerUtils.convertToComparableStringArray(rhsObj);
            dimCompare = Comparators.<Comparable>naturalNullsFirst().compare(lhsArr, rhsArr);
        } else if (dimensionType.equals(ColumnType.LONG_ARRAY) || dimensionType.equals(ColumnType.DOUBLE_ARRAY)) {
            final ComparableList lhsArr = DimensionHandlerUtils.convertToList(lhsObj);
            final ComparableList rhsArr = DimensionHandlerUtils.convertToList(rhsObj);
            dimCompare = Comparators.<Comparable>naturalNullsFirst().compare(lhsArr, rhsArr);
        } else {
            dimCompare = comparator.compare((String) lhsObj, (String) rhsObj);
        }
        if (dimCompare != 0) {
            return needsReverseList.get(i) ? -dimCompare : dimCompare;
        }
    }
    return 0;
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ComparableList(org.apache.druid.segment.data.ComparableList) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) StringComparator(org.apache.druid.query.ordering.StringComparator)

Example 17 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class GroupByQuery method getRowOrderingForPushDown.

/**
 * When limit push down is applied, the partial results would be sorted by the ordering specified by the
 * limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
 * so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
 */
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
    final boolean sortByDimsFirst = getContextSortByDimsFirst();
    final IntList orderedFieldNumbers = new IntArrayList();
    final Set<Integer> dimsInOrderBy = new HashSet<>();
    final List<Boolean> needsReverseList = new ArrayList<>();
    final List<ColumnType> dimensionTypes = new ArrayList<>();
    final List<StringComparator> comparators = new ArrayList<>();
    for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
        boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
        int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
        if (dimIndex >= 0) {
            DimensionSpec dim = dimensions.get(dimIndex);
            orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
            dimsInOrderBy.add(dimIndex);
            needsReverseList.add(needsReverse);
            final ColumnType type = dimensions.get(dimIndex).getOutputType();
            dimensionTypes.add(type);
            comparators.add(orderSpec.getDimensionComparator());
        }
    }
    for (int i = 0; i < dimensions.size(); i++) {
        if (!dimsInOrderBy.contains(i)) {
            orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
            needsReverseList.add(false);
            final ColumnType type = dimensions.get(i).getOutputType();
            dimensionTypes.add(type);
            comparators.add(StringComparators.LEXICOGRAPHIC);
        }
    }
    final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
    if (timeComparator == null) {
        return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
    } else if (sortByDimsFirst) {
        return Ordering.from((lhs, rhs) -> {
            final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
            if (cmp != 0) {
                return cmp;
            }
            return timeComparator.compare(lhs, rhs);
        });
    } else {
        return Ordering.from((lhs, rhs) -> {
            final int timeCompare = timeComparator.compare(lhs, rhs);
            if (timeCompare != 0) {
                return timeCompare;
            }
            return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
        });
    }
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Objects(java.util.Objects) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimFilter(org.apache.druid.query.filter.DimFilter) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HavingSpec(org.apache.druid.query.groupby.having.HavingSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ComparableList(org.apache.druid.segment.data.ComparableList) BaseQuery(org.apache.druid.query.BaseQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) VirtualColumns(org.apache.druid.segment.VirtualColumns) StringComparator(org.apache.druid.query.ordering.StringComparator) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) TableDataSource(org.apache.druid.query.TableDataSource) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Queries(org.apache.druid.query.Queries) IntList(it.unimi.dsi.fastutil.ints.IntList) Ordering(com.google.common.collect.Ordering) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) RowSignature(org.apache.druid.segment.column.RowSignature) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) Comparator(java.util.Comparator) Collections(java.util.Collections) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) StringComparator(org.apache.druid.query.ordering.StringComparator) IntList(it.unimi.dsi.fastutil.ints.IntList) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HashSet(java.util.HashSet)

Example 18 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class RowBasedGrouperHelper method createGrouperAccumulatorPair.

/**
 * Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
 * an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
 *
 * The pair will operate in one of two modes:
 *
 * 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
 * its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
 * results originating from the provided "query".
 *
 * 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
 * and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
 * ResultRows are assumed to be results originating from the provided "subquery".
 *
 * @param query               query that we are grouping for
 * @param subquery            optional subquery that we are receiving results from (see combining vs. subquery
 *                            mode above)
 * @param config              groupBy query config
 * @param bufferSupplier      supplier of merge buffers
 * @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
 * @param concurrencyHint     -1 for single-threaded Grouper, >=1 for concurrent Grouper
 * @param temporaryStorage    temporary storage used for spilling from the Grouper
 * @param spillMapper         object mapper used for spilling from the Grouper
 * @param grouperSorter       executor service used for parallel combining. Unused if concurrencyHint = -1, and may
 *                            be null in that case
 * @param priority            query priority
 * @param hasQueryTimeout     whether or not this query has a timeout
 * @param queryTimeoutAt      when this query times out, in milliseconds since the epoch
 * @param mergeBufferSize     size of the merge buffers from "bufferSupplier"
 */
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
    // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
    Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
    if (concurrencyHint >= 1) {
        Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
    }
    // See method-level javadoc; we go into combining mode if there is no subquery.
    final boolean combining = subquery == null;
    final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final boolean includeTimestamp = query.getResultRowHasTimestamp();
    final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
    ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
    // Apply virtual columns if we are in subquery (non-combining) mode.
    if (!combining) {
        columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
    }
    final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
    final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
    boolean sortHasNonGroupingFields = false;
    if (willApplyLimitPushDown) {
        sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
    }
    final AggregatorFactory[] aggregatorFactories;
    if (combining) {
        aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
    } else {
        aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
    }
    final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
    final Grouper<RowBasedKey> grouper;
    if (concurrencyHint == -1) {
        grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
    } else {
        final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
        querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
        grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
    }
    final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
    final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
    final Predicate<ResultRow> rowPredicate;
    if (combining) {
        // Filters are not applied in combining mode.
        rowPredicate = row -> true;
    } else {
        rowPredicate = getResultRowPredicate(query, subquery);
    }
    final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
        BaseQuery.checkInterrupted();
        if (priorResult != null && !priorResult.isOk()) {
            // Pass-through error returns without doing more work.
            return priorResult;
        }
        if (!grouper.isInitialized()) {
            grouper.init();
        }
        if (!rowPredicate.test(row)) {
            return AggregateResult.ok();
        }
        columnSelectorRow.set(row);
        final Comparable[] key = new Comparable[keySize];
        valueExtractFn.apply(row, key);
        final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
        columnSelectorRow.set(null);
        return aggregateResult;
    };
    return new Pair<>(grouper, accumulator);
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) ColumnType(org.apache.druid.segment.column.ColumnType) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) Pair(org.apache.druid.java.util.common.Pair) ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 19 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class GrouperBufferComparatorUtils method bufferComparatorWithAggregators.

public static Grouper.BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets, DefaultLimitSpec limitSpec, List<DimensionSpec> dimensions, Grouper.BufferComparator[] dimComparators, boolean includeTimestamp, boolean sortByDimsFirst, int keySize) {
    int dimCount = dimensions.size();
    final List<Boolean> needsReverses = new ArrayList<>();
    List<Grouper.BufferComparator> comparators = new ArrayList<>();
    Set<Integer> orderByIndices = new HashSet<>();
    int aggCount = 0;
    boolean needsReverse;
    for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
        needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
        int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
        if (dimIndex >= 0) {
            comparators.add(dimComparators[dimIndex]);
            orderByIndices.add(dimIndex);
            needsReverses.add(needsReverse);
        } else {
            int aggIndex = OrderByColumnSpec.getAggIndexForOrderBy(orderSpec, Arrays.asList(aggregatorFactories));
            if (aggIndex >= 0) {
                final StringComparator stringComparator = orderSpec.getDimensionComparator();
                final ColumnType valueType = aggregatorFactories[aggIndex].getIntermediateType();
                // Aggregators start after dimensions
                final int aggOffset = keySize + aggregatorOffsets[aggIndex];
                aggCount++;
                if (!valueType.isNumeric()) {
                    throw new IAE("Cannot order by a non-numeric aggregator[%s]", orderSpec);
                }
                comparators.add(makeNullHandlingBufferComparatorForNumericData(aggOffset, makeNumericBufferComparator(valueType, aggOffset, true, stringComparator)));
                needsReverses.add(needsReverse);
            }
        }
    }
    for (int i = 0; i < dimCount; i++) {
        if (!orderByIndices.contains(i)) {
            comparators.add(dimComparators[i]);
            // default to Ascending order if dim is not in an orderby spec
            needsReverses.add(false);
        }
    }
    final Grouper.BufferComparator[] adjustedSerdeHelperComparators = comparators.toArray(new Grouper.BufferComparator[0]);
    final int fieldCount = dimCount + aggCount;
    if (includeTimestamp) {
        if (sortByDimsFirst) {
            return new Grouper.BufferComparator() {

                @Override
                public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                    final int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    if (cmp != 0) {
                        return cmp;
                    }
                    return Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
                }
            };
        } else {
            return new Grouper.BufferComparator() {

                @Override
                public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                    final int timeCompare = Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
                    if (timeCompare != 0) {
                        return timeCompare;
                    }
                    int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    return cmp;
                }
            };
        }
    } else {
        return new Grouper.BufferComparator() {

            @Override
            public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
                for (int i = 0; i < fieldCount; i++) {
                    final int cmp;
                    if (needsReverses.get(i)) {
                        cmp = adjustedSerdeHelperComparators[i].compare(rhsBuffer, lhsBuffer, rhsPosition, lhsPosition);
                    } else {
                        cmp = adjustedSerdeHelperComparators[i].compare(lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
                    }
                    if (cmp != 0) {
                        return cmp;
                    }
                }
                return 0;
            }
        };
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) StringComparator(org.apache.druid.query.ordering.StringComparator) ByteBuffer(java.nio.ByteBuffer) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) HashSet(java.util.HashSet)

Example 20 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class DefaultLimitSpec method build.

@Override
public Function<Sequence<ResultRow>, Sequence<ResultRow>> build(final GroupByQuery query) {
    final List<DimensionSpec> dimensions = query.getDimensions();
    // Can avoid re-sorting if the natural ordering is good enough.
    boolean sortingNeeded = dimensions.size() < columns.size();
    final Set<String> aggAndPostAggNames = new HashSet<>();
    for (AggregatorFactory agg : query.getAggregatorSpecs()) {
        aggAndPostAggNames.add(agg.getName());
    }
    for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
        aggAndPostAggNames.add(postAgg.getName());
    }
    if (!sortingNeeded) {
        for (int i = 0; i < columns.size(); i++) {
            final OrderByColumnSpec columnSpec = columns.get(i);
            if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
                sortingNeeded = true;
                break;
            }
            final ColumnType columnType = getOrderByType(columnSpec, dimensions);
            final StringComparator naturalComparator;
            if (columnType.is(ValueType.STRING)) {
                naturalComparator = StringComparators.LEXICOGRAPHIC;
            } else if (columnType.isNumeric()) {
                naturalComparator = StringComparators.NUMERIC;
            } else if (columnType.isArray()) {
                if (columnType.getElementType().isNumeric()) {
                    naturalComparator = StringComparators.NUMERIC;
                } else {
                    naturalComparator = StringComparators.LEXICOGRAPHIC;
                }
            } else {
                sortingNeeded = true;
                break;
            }
            if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
                sortingNeeded = true;
                break;
            }
        }
    }
    if (!sortingNeeded) {
        // If granularity is ALL, sortByDimsFirst doesn't change the sorting order.
        sortingNeeded = !query.getGranularity().equals(Granularities.ALL) && query.getContextSortByDimsFirst();
    }
    if (!sortingNeeded) {
        String timestampField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
        if (timestampField != null && !timestampField.isEmpty()) {
            int timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
            sortingNeeded = query.getContextSortByDimsFirst() ? timestampResultFieldIndex != query.getDimensions().size() - 1 : timestampResultFieldIndex != 0;
        }
    }
    final Function<Sequence<ResultRow>, Sequence<ResultRow>> sortAndLimitFn;
    if (sortingNeeded) {
        // Materialize the Comparator first for fast-fail error checking.
        final Ordering<ResultRow> ordering = makeComparator(query.getResultRowSignature(), query.getResultRowHasTimestamp(), query.getDimensions(), query.getAggregatorSpecs(), query.getPostAggregatorSpecs(), query.getContextSortByDimsFirst());
        // underlying data isn't changing. (Useful for query reproducibility and offset-based pagination.)
        if (isLimited()) {
            sortAndLimitFn = results -> new TopNSequence<>(results, ordering, limit + offset);
        } else {
            sortAndLimitFn = results -> Sequences.sort(results, ordering).limit(limit + offset);
        }
    } else {
        if (isLimited()) {
            sortAndLimitFn = results -> results.limit(limit + offset);
        } else {
            sortAndLimitFn = Functions.identity();
        }
    }
    // Finally, apply offset after sorting and limiting.
    if (isOffset()) {
        return results -> sortAndLimitFn.apply(results).skip(offset);
    } else {
        return sortAndLimitFn;
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ComparableList(org.apache.druid.segment.data.ComparableList) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) Rows(org.apache.druid.data.input.Rows) HashMap(java.util.HashMap) ByteBuffer(java.nio.ByteBuffer) HashSet(java.util.HashSet) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) StringComparator(org.apache.druid.query.ordering.StringComparator) ResultRow(org.apache.druid.query.groupby.ResultRow) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Objects(java.util.Objects) List(java.util.List) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) TopNSequence(org.apache.druid.java.util.common.guava.TopNSequence) Collections(java.util.Collections) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Sequence(org.apache.druid.java.util.common.guava.Sequence) TopNSequence(org.apache.druid.java.util.common.guava.TopNSequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringComparator(org.apache.druid.query.ordering.StringComparator) HashSet(java.util.HashSet)

Aggregations

ColumnType (org.apache.druid.segment.column.ColumnType)43 Nullable (javax.annotation.Nullable)16 ISE (org.apache.druid.java.util.common.ISE)15 RowSignature (org.apache.druid.segment.column.RowSignature)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)13 RexNode (org.apache.calcite.rex.RexNode)12 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)12 List (java.util.List)11 IAE (org.apache.druid.java.util.common.IAE)11 RelDataType (org.apache.calcite.rel.type.RelDataType)9 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)9 Collectors (java.util.stream.Collectors)8 ArrayList (java.util.ArrayList)7 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 StringComparator (org.apache.druid.query.ordering.StringComparator)5 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)5 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)4 Preconditions (com.google.common.base.Preconditions)4 Collections (java.util.Collections)4