use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.
the class MultiValuedDimensionTest method testGroupByExpressionMultiMultiAutoAutoDupeIdentifier.
@Test
public void testGroupByExpressionMultiMultiAutoAutoDupeIdentifier() {
GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("texpr", "texpr")).setVirtualColumns(new ExpressionVirtualColumn("texpr", "concat(tags, tags)", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setLimitSpec(new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("count", OrderByColumnSpec.Direction.DESCENDING)), 5)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t5t5", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", NullHandling.emptyToNullIfNeeded(""), "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2t2", "count", 2L));
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto-self");
}
use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.
the class GroupByQuery method getRowOrderingForPushDown.
/**
* When limit push down is applied, the partial results would be sorted by the ordering specified by the
* limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
* so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
*/
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
final boolean sortByDimsFirst = getContextSortByDimsFirst();
final IntList orderedFieldNumbers = new IntArrayList();
final Set<Integer> dimsInOrderBy = new HashSet<>();
final List<Boolean> needsReverseList = new ArrayList<>();
final List<ColumnType> dimensionTypes = new ArrayList<>();
final List<StringComparator> comparators = new ArrayList<>();
for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
if (dimIndex >= 0) {
DimensionSpec dim = dimensions.get(dimIndex);
orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
dimsInOrderBy.add(dimIndex);
needsReverseList.add(needsReverse);
final ColumnType type = dimensions.get(dimIndex).getOutputType();
dimensionTypes.add(type);
comparators.add(orderSpec.getDimensionComparator());
}
}
for (int i = 0; i < dimensions.size(); i++) {
if (!dimsInOrderBy.contains(i)) {
orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
needsReverseList.add(false);
final ColumnType type = dimensions.get(i).getOutputType();
dimensionTypes.add(type);
comparators.add(StringComparators.LEXICOGRAPHIC);
}
}
final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
if (timeComparator == null) {
return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
} else if (sortByDimsFirst) {
return Ordering.from((lhs, rhs) -> {
final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
if (cmp != 0) {
return cmp;
}
return timeComparator.compare(lhs, rhs);
});
} else {
return Ordering.from((lhs, rhs) -> {
final int timeCompare = timeComparator.compare(lhs, rhs);
if (timeCompare != 0) {
return timeCompare;
}
return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
});
}
}
use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.
the class GrouperBufferComparatorUtils method bufferComparatorWithAggregators.
public static Grouper.BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets, DefaultLimitSpec limitSpec, List<DimensionSpec> dimensions, Grouper.BufferComparator[] dimComparators, boolean includeTimestamp, boolean sortByDimsFirst, int keySize) {
int dimCount = dimensions.size();
final List<Boolean> needsReverses = new ArrayList<>();
List<Grouper.BufferComparator> comparators = new ArrayList<>();
Set<Integer> orderByIndices = new HashSet<>();
int aggCount = 0;
boolean needsReverse;
for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
if (dimIndex >= 0) {
comparators.add(dimComparators[dimIndex]);
orderByIndices.add(dimIndex);
needsReverses.add(needsReverse);
} else {
int aggIndex = OrderByColumnSpec.getAggIndexForOrderBy(orderSpec, Arrays.asList(aggregatorFactories));
if (aggIndex >= 0) {
final StringComparator stringComparator = orderSpec.getDimensionComparator();
final ColumnType valueType = aggregatorFactories[aggIndex].getIntermediateType();
// Aggregators start after dimensions
final int aggOffset = keySize + aggregatorOffsets[aggIndex];
aggCount++;
if (!valueType.isNumeric()) {
throw new IAE("Cannot order by a non-numeric aggregator[%s]", orderSpec);
}
comparators.add(makeNullHandlingBufferComparatorForNumericData(aggOffset, makeNumericBufferComparator(valueType, aggOffset, true, stringComparator)));
needsReverses.add(needsReverse);
}
}
}
for (int i = 0; i < dimCount; i++) {
if (!orderByIndices.contains(i)) {
comparators.add(dimComparators[i]);
// default to Ascending order if dim is not in an orderby spec
needsReverses.add(false);
}
}
final Grouper.BufferComparator[] adjustedSerdeHelperComparators = comparators.toArray(new Grouper.BufferComparator[0]);
final int fieldCount = dimCount + aggCount;
if (includeTimestamp) {
if (sortByDimsFirst) {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
final int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
if (cmp != 0) {
return cmp;
}
return Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
}
};
} else {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
final int timeCompare = Longs.compare(lhsBuffer.getLong(lhsPosition), rhsBuffer.getLong(rhsPosition));
if (timeCompare != 0) {
return timeCompare;
}
int cmp = compareDimsInBuffersForNullFudgeTimestampForPushDown(adjustedSerdeHelperComparators, needsReverses, fieldCount, lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
return cmp;
}
};
}
} else {
return new Grouper.BufferComparator() {
@Override
public int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition) {
for (int i = 0; i < fieldCount; i++) {
final int cmp;
if (needsReverses.get(i)) {
cmp = adjustedSerdeHelperComparators[i].compare(rhsBuffer, lhsBuffer, rhsPosition, lhsPosition);
} else {
cmp = adjustedSerdeHelperComparators[i].compare(lhsBuffer, rhsBuffer, lhsPosition, rhsPosition);
}
if (cmp != 0) {
return cmp;
}
}
return 0;
}
};
}
}
use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.
the class GroupByBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new CountAggregatorFactory("cnt"));
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("A", queryA);
}
{
// basic.sorted
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
basicQueries.put("sorted", queryA);
}
{
// basic.nested
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("nested", queryA);
}
{
// basic.filter
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("filter", queryA);
}
{
// basic.singleZipf
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("singleZipf", queryA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
{
// simple.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simple", simpleQueries);
Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
{
// simpleLong.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleLongQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
{
// simpleFloat.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleFloatQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
{
// simple-null
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
nullQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}
use of org.apache.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.
the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.
@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setConcurrentEventAdd(true).setMaxRowCount(1000).build();
String visitor_id = "visitor_id";
String client_type = "client_type";
long timestamp = DateTimes.of("2010-01-01").getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(client_type, client_type)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
Aggregations