use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class MultiValuedDimensionTest method testGroupByExpressionMultiConflicting.
@Test
public void testGroupByExpressionMultiConflicting() {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("Invalid expression: (concat [(map ([x] -> (concat [x, othertags])), [tags]), tags]); [tags] used as both scalar and array variables");
GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("texpr", "texpr")).setVirtualColumns(new ExpressionVirtualColumn("texpr", "concat(map((x) -> concat(x, othertags), tags), tags)", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setLimit(5).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query).toList();
}
use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class VarianceGroupByQueryTest method testGroupByVarianceOnly.
@Test
public void testGroupByVarianceOnly() {
GroupByQuery query = queryBuilder.setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(VarianceTestHelper.INDEX_VARIANCE_AGGR).setPostAggregatorSpecs(Collections.singletonList(VarianceTestHelper.STD_DEV_OF_INDEX_POST_AGGR)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "index_stddev", "index_var" });
List<ResultRow> expectedResults = builder.add("2011-04-01", "automotive", 0d, 0d).add("2011-04-01", "business", 0d, 0d).add("2011-04-01", "entertainment", 0d, 0d).add("2011-04-01", "health", 0d, 0d).add("2011-04-01", "mezzanine", 737.0179286322613d, 543195.4271253889d).add("2011-04-01", "news", 0d, 0d).add("2011-04-01", "premium", 726.6322593583996d, 527994.4403402924d).add("2011-04-01", "technology", 0d, 0d).add("2011-04-01", "travel", 0d, 0d).add("2011-04-02", "automotive", 0d, 0d).add("2011-04-02", "business", 0d, 0d).add("2011-04-02", "entertainment", 0d, 0d).add("2011-04-02", "health", 0d, 0d).add("2011-04-02", "mezzanine", 611.3420766546617d, 373739.13468843425d).add("2011-04-02", "news", 0d, 0d).add("2011-04-02", "premium", 621.3898134843073d, 386125.30030206224d).add("2011-04-02", "technology", 0d, 0d).add("2011-04-02", "travel", 0d, 0d).build(query);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "variance");
}
use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class VarianceGroupByQueryTest method testGroupBy.
@Test
public void testGroupBy() {
GroupByQuery query = queryBuilder.setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, VarianceTestHelper.INDEX_VARIANCE_AGGR, new LongSumAggregatorFactory("idx", "index")).setPostAggregatorSpecs(Collections.singletonList(VarianceTestHelper.STD_DEV_OF_INDEX_POST_AGGR)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "idx", "index_stddev", "index_var" });
List<ResultRow> expectedResults = builder.add("2011-04-01", "automotive", 1L, 135L, 0d, 0d).add("2011-04-01", "business", 1L, 118L, 0d, 0d).add("2011-04-01", "entertainment", 1L, 158L, 0d, 0d).add("2011-04-01", "health", 1L, 120L, 0d, 0d).add("2011-04-01", "mezzanine", 3L, 2870L, 737.0179286322613d, 543195.4271253889d).add("2011-04-01", "news", 1L, 121L, 0d, 0d).add("2011-04-01", "premium", 3L, 2900L, 726.6322593583996d, 527994.4403402924d).add("2011-04-01", "technology", 1L, 78L, 0d, 0d).add("2011-04-01", "travel", 1L, 119L, 0d, 0d).add("2011-04-02", "automotive", 1L, 147L, 0d, 0d).add("2011-04-02", "business", 1L, 112L, 0d, 0d).add("2011-04-02", "entertainment", 1L, 166L, 0d, 0d).add("2011-04-02", "health", 1L, 113L, 0d, 0d).add("2011-04-02", "mezzanine", 3L, 2447L, 611.3420766546617d, 373739.13468843425d).add("2011-04-02", "news", 1L, 114L, 0d, 0d).add("2011-04-02", "premium", 3L, 2505L, 621.3898134843073d, 386125.30030206224d).add("2011-04-02", "technology", 1L, 97L, 0d, 0d).add("2011-04-02", "travel", 1L, 126L, 0d, 0d).build(query);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
}
use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class VarianceGroupByQueryTest method testGroupByTestPvalueZscorePostAgg.
@Test
public void testGroupByTestPvalueZscorePostAgg() {
// test postaggs from 'teststats' package in here since we've already gone to the trouble of setting up the test
GroupByQuery query = queryBuilder.setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, VarianceTestHelper.INDEX_VARIANCE_AGGR, new LongSumAggregatorFactory("idx", "index")).setPostAggregatorSpecs(ImmutableList.of(VarianceTestHelper.STD_DEV_OF_INDEX_POST_AGGR, // nonsensical inputs
new PvaluefromZscorePostAggregator("pvalueZscore", new FieldAccessPostAggregator("f1", "index_stddev")))).setLimitSpec(new DefaultLimitSpec(OrderByColumnSpec.descending("pvalueZscore"), 1)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "idx", "index_stddev", "index_var", "pvalueZscore" });
List<ResultRow> expectedResults = builder.add("2011-04-01", "automotive", 1L, 135.0, 0.0, 0.0, 1.0).build(query);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
}
use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
*
* The pair will operate in one of two modes:
*
* 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
* results originating from the provided "query".
*
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
* ResultRows are assumed to be results originating from the provided "subquery".
*
* @param query query that we are grouping for
* @param subquery optional subquery that we are receiving results from (see combining vs. subquery
* mode above)
* @param config groupBy query config
* @param bufferSupplier supplier of merge buffers
* @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
* @param concurrencyHint -1 for single-threaded Grouper, >=1 for concurrent Grouper
* @param temporaryStorage temporary storage used for spilling from the Grouper
* @param spillMapper object mapper used for spilling from the Grouper
* @param grouperSorter executor service used for parallel combining. Unused if concurrencyHint = -1, and may
* be null in that case
* @param priority query priority
* @param hasQueryTimeout whether or not this query has a timeout
* @param queryTimeoutAt when this query times out, in milliseconds since the epoch
* @param mergeBufferSize size of the merge buffers from "bufferSupplier"
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
if (concurrencyHint >= 1) {
Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
}
// See method-level javadoc; we go into combining mode if there is no subquery.
final boolean combining = subquery == null;
final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
// Apply virtual columns if we are in subquery (non-combining) mode.
if (!combining) {
columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
}
final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
boolean sortHasNonGroupingFields = false;
if (willApplyLimitPushDown) {
sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
}
final AggregatorFactory[] aggregatorFactories;
if (combining) {
aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
} else {
aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
}
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
final Predicate<ResultRow> rowPredicate;
if (combining) {
// Filters are not applied in combining mode.
rowPredicate = row -> true;
} else {
rowPredicate = getResultRowPredicate(query, subquery);
}
final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
BaseQuery.checkInterrupted();
if (priorResult != null && !priorResult.isOk()) {
// Pass-through error returns without doing more work.
return priorResult;
}
if (!grouper.isInitialized()) {
grouper.init();
}
if (!rowPredicate.test(row)) {
return AggregateResult.ok();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
columnSelectorRow.set(null);
return aggregateResult;
};
return new Pair<>(grouper, accumulator);
}
Aggregations