use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.
the class GroupByQuery method getRowOrderingForPushDown.
/**
* When limit push down is applied, the partial results would be sorted by the ordering specified by the
* limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
* so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
*/
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
final boolean sortByDimsFirst = getContextSortByDimsFirst();
final IntList orderedFieldNumbers = new IntArrayList();
final Set<Integer> dimsInOrderBy = new HashSet<>();
final List<Boolean> needsReverseList = new ArrayList<>();
final List<ColumnType> dimensionTypes = new ArrayList<>();
final List<StringComparator> comparators = new ArrayList<>();
for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
if (dimIndex >= 0) {
DimensionSpec dim = dimensions.get(dimIndex);
orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
dimsInOrderBy.add(dimIndex);
needsReverseList.add(needsReverse);
final ColumnType type = dimensions.get(dimIndex).getOutputType();
dimensionTypes.add(type);
comparators.add(orderSpec.getDimensionComparator());
}
}
for (int i = 0; i < dimensions.size(); i++) {
if (!dimsInOrderBy.contains(i)) {
orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
needsReverseList.add(false);
final ColumnType type = dimensions.get(i).getOutputType();
dimensionTypes.add(type);
comparators.add(StringComparators.LEXICOGRAPHIC);
}
}
final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
if (timeComparator == null) {
return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
} else if (sortByDimsFirst) {
return Ordering.from((lhs, rhs) -> {
final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
if (cmp != 0) {
return cmp;
}
return timeComparator.compare(lhs, rhs);
});
} else {
return Ordering.from((lhs, rhs) -> {
final int timeCompare = timeComparator.compare(lhs, rhs);
if (timeCompare != 0) {
return timeCompare;
}
return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
});
}
}
use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.
the class GroupByStrategyV2 method processSubtotalsSpec.
@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
// How it works?
// First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
// Next for each subtotalSpec
// If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
// sorted by subtotalSpec, stream merge them and return.
//
// If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
// resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
// of course sorted by subtotalSpec, stream merge them and return.
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
// while creating the sequence.
GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
try {
// baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
// rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
// make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
// one-by-one soon enough.
GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
// Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
Set<String> aggsAndPostAggs = null;
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
}
List<List<String>> subtotals = query.getSubtotalsSpec();
List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
// Iterate through each subtotalSpec, build results for it and add to subtotalsResults
for (List<String> subtotalSpec : subtotals) {
final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
// Dimension spec including dimension name and output name
final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
final List<DimensionSpec> dimensions = query.getDimensions();
for (DimensionSpec dimensionSpec : dimensions) {
if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
subTotalDimensionSpec.add(dimensionSpec);
}
}
// Create appropriate LimitSpec for subtotal query
LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
Set<String> columns = new HashSet<>(aggsAndPostAggs);
columns.addAll(subtotalSpec);
subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
}
GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
// Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
// by subtotalSpec as needed by stream merging.
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
} else {
// Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
// by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
// by subtotalSpec and then stream merge them.
// Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
// merge buffers for processing subtotal.
Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
}
}
return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
resultSupplierOne);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
}
}
use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testMergeResultsWithOrderBy.
@Test
public void testMergeResultsWithOrderBy() {
LimitSpec[] orderBySpecs = new LimitSpec[] { new DefaultLimitSpec(OrderByColumnSpec.ascending("idx"), null), new DefaultLimitSpec(OrderByColumnSpec.ascending("rows", "idx"), null), new DefaultLimitSpec(OrderByColumnSpec.descending("idx"), null), new DefaultLimitSpec(OrderByColumnSpec.descending("rows", "idx"), null) };
GroupByQuery baseQuery = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).build();
List<ResultRow> allResults = Arrays.asList(makeRow(baseQuery, "2011-04-01", "alias", "automotive", "rows", 2L, "idx", 269L), makeRow(baseQuery, "2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), makeRow(baseQuery, "2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 319L), makeRow(baseQuery, "2011-04-01", "alias", "health", "rows", 2L, "idx", 216L), makeRow(baseQuery, "2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), makeRow(baseQuery, "2011-04-01", "alias", "news", "rows", 2L, "idx", 221L), makeRow(baseQuery, "2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L), makeRow(baseQuery, "2011-04-01", "alias", "technology", "rows", 2L, "idx", 177L), makeRow(baseQuery, "2011-04-01", "alias", "travel", "rows", 2L, "idx", 243L));
final int idxPosition = baseQuery.getResultRowSignature().indexOf("idx");
final int rowsPosition = baseQuery.getResultRowSignature().indexOf("rows");
Comparator<ResultRow> idxComparator = Comparator.comparing(row -> ((Number) row.get(idxPosition)).floatValue());
Comparator<ResultRow> rowsComparator = Comparator.comparing(row -> ((Number) row.get(rowsPosition)).floatValue());
Comparator<ResultRow> rowsIdxComparator = Ordering.from(rowsComparator).thenComparing(idxComparator);
List<List<ResultRow>> expectedResults = Lists.newArrayList(Ordering.from(idxComparator).sortedCopy(allResults), Ordering.from(rowsIdxComparator).sortedCopy(allResults), Ordering.from(idxComparator).reverse().sortedCopy(allResults), Ordering.from(rowsIdxComparator).reverse().sortedCopy(allResults));
for (int i = 0; i < orderBySpecs.length; ++i) {
doTestMergeResultsWithOrderBy(baseQuery, orderBySpecs[i], expectedResults.get(i));
}
}
Aggregations