use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class GroupByQuery method compareDimsForLimitPushDown.
private static int compareDimsForLimitPushDown(final IntList fields, final List<Boolean> needsReverseList, final List<ColumnType> dimensionTypes, final List<StringComparator> comparators, final ResultRow lhs, final ResultRow rhs) {
for (int i = 0; i < fields.size(); i++) {
final int fieldNumber = fields.getInt(i);
final StringComparator comparator = comparators.get(i);
final ColumnType dimensionType = dimensionTypes.get(i);
final int dimCompare;
final Object lhsObj = lhs.get(fieldNumber);
final Object rhsObj = rhs.get(fieldNumber);
if (dimensionType.isNumeric()) {
if (comparator.equals(StringComparators.NUMERIC)) {
dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhsObj, rhsObj, dimensionType);
} else {
dimCompare = comparator.compare(String.valueOf(lhsObj), String.valueOf(rhsObj));
}
} else if (dimensionType.equals(ColumnType.STRING_ARRAY)) {
final ComparableStringArray lhsArr = DimensionHandlerUtils.convertToComparableStringArray(lhsObj);
final ComparableStringArray rhsArr = DimensionHandlerUtils.convertToComparableStringArray(rhsObj);
dimCompare = Comparators.<Comparable>naturalNullsFirst().compare(lhsArr, rhsArr);
} else if (dimensionType.equals(ColumnType.LONG_ARRAY) || dimensionType.equals(ColumnType.DOUBLE_ARRAY)) {
final ComparableList lhsArr = DimensionHandlerUtils.convertToList(lhsObj);
final ComparableList rhsArr = DimensionHandlerUtils.convertToList(rhsObj);
dimCompare = Comparators.<Comparable>naturalNullsFirst().compare(lhsArr, rhsArr);
} else {
dimCompare = comparator.compare((String) lhsObj, (String) rhsObj);
}
if (dimCompare != 0) {
return needsReverseList.get(i) ? -dimCompare : dimCompare;
}
}
return 0;
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class GroupByQuery method getRowOrderingForPushDown.
/**
* When limit push down is applied, the partial results would be sorted by the ordering specified by the
* limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
* so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
*/
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
final boolean sortByDimsFirst = getContextSortByDimsFirst();
final IntList orderedFieldNumbers = new IntArrayList();
final Set<Integer> dimsInOrderBy = new HashSet<>();
final List<Boolean> needsReverseList = new ArrayList<>();
final List<ColumnType> dimensionTypes = new ArrayList<>();
final List<StringComparator> comparators = new ArrayList<>();
for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
if (dimIndex >= 0) {
DimensionSpec dim = dimensions.get(dimIndex);
orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
dimsInOrderBy.add(dimIndex);
needsReverseList.add(needsReverse);
final ColumnType type = dimensions.get(dimIndex).getOutputType();
dimensionTypes.add(type);
comparators.add(orderSpec.getDimensionComparator());
}
}
for (int i = 0; i < dimensions.size(); i++) {
if (!dimsInOrderBy.contains(i)) {
orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
needsReverseList.add(false);
final ColumnType type = dimensions.get(i).getOutputType();
dimensionTypes.add(type);
comparators.add(StringComparators.LEXICOGRAPHIC);
}
}
final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
if (timeComparator == null) {
return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
} else if (sortByDimsFirst) {
return Ordering.from((lhs, rhs) -> {
final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
if (cmp != 0) {
return cmp;
}
return timeComparator.compare(lhs, rhs);
});
} else {
return Ordering.from((lhs, rhs) -> {
final int timeCompare = timeComparator.compare(lhs, rhs);
if (timeCompare != 0) {
return timeCompare;
}
return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
});
}
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
*
* The pair will operate in one of two modes:
*
* 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
* results originating from the provided "query".
*
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
* ResultRows are assumed to be results originating from the provided "subquery".
*
* @param query query that we are grouping for
* @param subquery optional subquery that we are receiving results from (see combining vs. subquery
* mode above)
* @param config groupBy query config
* @param bufferSupplier supplier of merge buffers
* @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
* @param concurrencyHint -1 for single-threaded Grouper, >=1 for concurrent Grouper
* @param temporaryStorage temporary storage used for spilling from the Grouper
* @param spillMapper object mapper used for spilling from the Grouper
* @param grouperSorter executor service used for parallel combining. Unused if concurrencyHint = -1, and may
* be null in that case
* @param priority query priority
* @param hasQueryTimeout whether or not this query has a timeout
* @param queryTimeoutAt when this query times out, in milliseconds since the epoch
* @param mergeBufferSize size of the merge buffers from "bufferSupplier"
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
if (concurrencyHint >= 1) {
Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
}
// See method-level javadoc; we go into combining mode if there is no subquery.
final boolean combining = subquery == null;
final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
// Apply virtual columns if we are in subquery (non-combining) mode.
if (!combining) {
columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
}
final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
boolean sortHasNonGroupingFields = false;
if (willApplyLimitPushDown) {
sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
}
final AggregatorFactory[] aggregatorFactories;
if (combining) {
aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
} else {
aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
}
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
final Predicate<ResultRow> rowPredicate;
if (combining) {
// Filters are not applied in combining mode.
rowPredicate = row -> true;
} else {
rowPredicate = getResultRowPredicate(query, subquery);
}
final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
BaseQuery.checkInterrupted();
if (priorResult != null && !priorResult.isOk()) {
// Pass-through error returns without doing more work.
return priorResult;
}
if (!grouper.isInitialized()) {
grouper.init();
}
if (!rowPredicate.test(row)) {
return AggregateResult.ok();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
columnSelectorRow.set(null);
return aggregateResult;
};
return new Pair<>(grouper, accumulator);
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class GrouperBufferComparatorUtils method bufferComparatorWithAggregators.
public static Grouper.BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets, DefaultLimitSpec limitSpec, List<DimensionSpec> dimensions, Grouper.BufferComparator[] dimComparators, boolean includeTimestamp, boolean sortByDimsFirst, int keySize) {
int dimCount = dimensions.size();
final List<Boolean> needsReverses = new ArrayList<>();
List<Grouper.BufferComparator> comparators = new ArrayList<>();
Set<Integer> orderByIndices = new HashSet<>();
int aggCount = 0;
boolean needsReverse;
for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
if (dimIndex >= 0) {
comparators.add(dimComparators[dimIndex]);
orderByIndices.add(dimIndex);
needsReverses.add(needsReverse);
} else {
int aggIndex = OrderByColumnSpec.getAggIndexForOrderBy(orderSpec, Arrays.asList(aggregatorFactories));
if (aggIndex >= 0) {
final StringComparator stringComparator = orderSpec.getDimensionComparator();
final ColumnType valueType = aggregatorFactories[aggIndex].getIntermediateType();
// Aggregators start after dimensions
final int aggOffset = keySize + aggregatorOffsets[aggIndex];
aggCount++;
if (!valueType.isNumeric()) {
throw new IAE("Cannot order by a non-numeric aggregator[%s]", orderSpec);
}
comparators.add(makeNullHandlingBufferComparatorForNumericData(aggOffset, makeNumericBufferComparator(valueType, aggOffset, true, stringComparator)));
needsReverses.add(needsReverse);
}
}
}
for (int i = 0; i < dimCount; i++) {
if (!orderByIndices.contains(i)) {
comparators.add(dimComparators[i]);
// default to Ascending order if dim is not in an orderby spec
needsReverses.add(false);
}
}
final Grouper.BufferComparator[] adjustedSerdeHelperComparators = comparators.toArray(new Grouper.BufferComparator[0]);
final int fieldCount = dimCount + aggCount;
if (includeTimestamp) {
if (sortByDimsFirst) {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
final int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
if (cmp != 0) {
return cmp;
}
return Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
}
};
} else {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
final int timeCompare = Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
if (timeCompare != 0) {
return timeCompare;
}
int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
return cmp;
}
};
}
} else {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
for (int i = 0; i < fieldCount; i++) {
final int cmp;
if (needsReverses.get(i)) {
cmp = adjustedSerdeHelperComparators[i].compare(rhsBuffer, lhsBuffer, rhsPosition, lhsPosition);
} else {
cmp = adjustedSerdeHelperComparators[i].compare(lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
}
if (cmp != 0) {
return cmp;
}
}
return 0;
}
};
}
}
use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.
the class DefaultLimitSpec method build.
@Override
public Function<Sequence<ResultRow>, Sequence<ResultRow>> build(final GroupByQuery query) {
final List<DimensionSpec> dimensions = query.getDimensions();
// Can avoid re-sorting if the natural ordering is good enough.
boolean sortingNeeded = dimensions.size() < columns.size();
final Set<String> aggAndPostAggNames = new HashSet<>();
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
aggAndPostAggNames.add(agg.getName());
}
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
aggAndPostAggNames.add(postAgg.getName());
}
if (!sortingNeeded) {
for (int i = 0; i < columns.size(); i++) {
final OrderByColumnSpec columnSpec = columns.get(i);
if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
sortingNeeded = true;
break;
}
final ColumnType columnType = getOrderByType(columnSpec, dimensions);
final StringComparator naturalComparator;
if (columnType.is(ValueType.STRING)) {
naturalComparator = StringComparators.LEXICOGRAPHIC;
} else if (columnType.isNumeric()) {
naturalComparator = StringComparators.NUMERIC;
} else if (columnType.isArray()) {
if (columnType.getElementType().isNumeric()) {
naturalComparator = StringComparators.NUMERIC;
} else {
naturalComparator = StringComparators.LEXICOGRAPHIC;
}
} else {
sortingNeeded = true;
break;
}
if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
sortingNeeded = true;
break;
}
}
}
if (!sortingNeeded) {
// If granularity is ALL, sortByDimsFirst doesn't change the sorting order.
sortingNeeded = !query.getGranularity().equals(Granularities.ALL) && query.getContextSortByDimsFirst();
}
if (!sortingNeeded) {
String timestampField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
if (timestampField != null && !timestampField.isEmpty()) {
int timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
sortingNeeded = query.getContextSortByDimsFirst() ? timestampResultFieldIndex != query.getDimensions().size() - 1 : timestampResultFieldIndex != 0;
}
}
final Function<Sequence<ResultRow>, Sequence<ResultRow>> sortAndLimitFn;
if (sortingNeeded) {
// Materialize the Comparator first for fast-fail error checking.
final Ordering<ResultRow> ordering = makeComparator(query.getResultRowSignature(), query.getResultRowHasTimestamp(), query.getDimensions(), query.getAggregatorSpecs(), query.getPostAggregatorSpecs(), query.getContextSortByDimsFirst());
// underlying data isn't changing. (Useful for query reproducibility and offset-based pagination.)
if (isLimited()) {
sortAndLimitFn = results -> new TopNSequence<>(results, ordering, limit + offset);
} else {
sortAndLimitFn = results -> Sequences.sort(results, ordering).limit(limit + offset);
}
} else {
if (isLimited()) {
sortAndLimitFn = results -> results.limit(limit + offset);
} else {
sortAndLimitFn = Functions.identity();
}
}
// Finally, apply offset after sorting and limiting.
if (isOffset()) {
return results -> sortAndLimitFn.apply(results).skip(offset);
} else {
return sortAndLimitFn;
}
}
Aggregations