use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testSanity.
@Test
public void testSanity() throws Exception {
IncrementalIndex index = indexCreator.createIndex();
index.add(new MapBasedInputRow(System.currentTimeMillis() - 1, Collections.singletonList("billy"), ImmutableMap.of("billy", "hi")));
index.add(new MapBasedInputRow(System.currentTimeMillis() - 1, Collections.singletonList("sally"), ImmutableMap.of("sally", "bo")));
try (CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("GroupByQueryEngine-bufferPool", () -> ByteBuffer.allocate(50000))) {
final GroupByQueryEngine engine = new GroupByQueryEngine(Suppliers.ofInstance(new GroupByQueryConfig() {
@Override
public int getMaxIntermediateRows() {
return 5;
}
}), pool);
final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(DateTimes.EPOCH, DateTimes.nowUtc())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).build(), new IncrementalIndexStorageAdapter(index));
final List<Row> results = rows.toList();
Assert.assertEquals(2, results.size());
MapBasedRow row = (MapBasedRow) results.get(0);
Assert.assertEquals(ImmutableMap.of("sally", "bo", "cnt", 1L), row.getEvent());
row = (MapBasedRow) results.get(1);
Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
}
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class VectorizedVirtualColumnTest method setup.
@Before
public void setup() {
groupByTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(Collections.emptyList(), new GroupByQueryConfig(), tmpFolder);
timeseriesTestHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper(Collections.emptyList(), tmpFolder);
QueryableIndexSegment queryableIndexSegment = new QueryableIndexSegment(TestIndex.getMMappedTestIndex(), SegmentId.dummy(QueryRunnerTestHelper.DATA_SOURCE));
segments = Lists.newArrayList(queryableIndexSegment, queryableIndexSegment);
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class QueryStackTests method createQueryRunnerFactoryConglomerate.
public static QueryRunnerFactoryConglomerate createQueryRunnerFactoryConglomerate(final Closer closer, final DruidProcessingConfig processingConfig, final Supplier<Integer> minTopNThresholdSupplier) {
final CloseableStupidPool<ByteBuffer> stupidPool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", () -> ByteBuffer.allocate(COMPUTE_BUFFER_SIZE));
closer.register(stupidPool);
final Pair<GroupByQueryRunnerFactory, Closer> factoryCloserPair = GroupByQueryRunnerTest.makeQueryRunnerFactory(GroupByQueryRunnerTest.DEFAULT_MAPPER, new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
}, processingConfig);
final GroupByQueryRunnerFactory groupByQueryRunnerFactory = factoryCloserPair.lhs;
closer.register(factoryCloserPair.rhs);
final QueryRunnerFactoryConglomerate conglomerate = new DefaultQueryRunnerFactoryConglomerate(ImmutableMap.<Class<? extends Query>, QueryRunnerFactory>builder().put(SegmentMetadataQuery.class, new SegmentMetadataQueryRunnerFactory(new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig("P1W")), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(ScanQuery.class, new ScanQueryRunnerFactory(new ScanQueryQueryToolChest(new ScanQueryConfig(), new DefaultGenericQueryMetricsFactory()), new ScanQueryEngine(), new ScanQueryConfig())).put(TimeseriesQuery.class, new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(TopNQuery.class, new TopNQueryRunnerFactory(stupidPool, new TopNQueryQueryToolChest(new TopNQueryConfig() {
@Override
public int getMinTopNThreshold() {
return minTopNThresholdSupplier.get();
}
}), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(GroupByQuery.class, groupByQueryRunnerFactory).build());
return conglomerate;
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
*
* The pair will operate in one of two modes:
*
* 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
* results originating from the provided "query".
*
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
* ResultRows are assumed to be results originating from the provided "subquery".
*
* @param query query that we are grouping for
* @param subquery optional subquery that we are receiving results from (see combining vs. subquery
* mode above)
* @param config groupBy query config
* @param bufferSupplier supplier of merge buffers
* @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
* @param concurrencyHint -1 for single-threaded Grouper, >=1 for concurrent Grouper
* @param temporaryStorage temporary storage used for spilling from the Grouper
* @param spillMapper object mapper used for spilling from the Grouper
* @param grouperSorter executor service used for parallel combining. Unused if concurrencyHint = -1, and may
* be null in that case
* @param priority query priority
* @param hasQueryTimeout whether or not this query has a timeout
* @param queryTimeoutAt when this query times out, in milliseconds since the epoch
* @param mergeBufferSize size of the merge buffers from "bufferSupplier"
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
if (concurrencyHint >= 1) {
Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
}
// See method-level javadoc; we go into combining mode if there is no subquery.
final boolean combining = subquery == null;
final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
// Apply virtual columns if we are in subquery (non-combining) mode.
if (!combining) {
columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
}
final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
boolean sortHasNonGroupingFields = false;
if (willApplyLimitPushDown) {
sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
}
final AggregatorFactory[] aggregatorFactories;
if (combining) {
aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
} else {
aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
}
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
final Predicate<ResultRow> rowPredicate;
if (combining) {
// Filters are not applied in combining mode.
rowPredicate = row -> true;
} else {
rowPredicate = getResultRowPredicate(query, subquery);
}
final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
BaseQuery.checkInterrupted();
if (priorResult != null && !priorResult.isOk()) {
// Pass-through error returns without doing more work.
return priorResult;
}
if (!grouper.isInitialized()) {
grouper.init();
}
if (!rowPredicate.test(row)) {
return AggregateResult.ok();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
columnSelectorRow.set(null);
return aggregateResult;
};
return new Pair<>(grouper, accumulator);
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<ResultRow> run(final QueryPlus<ResultRow> queryPlus, final ResponseContext responseContext) {
final GroupByQuery query = (GroupByQuery) queryPlus.getQuery();
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final QueryPlus<ResultRow> queryPlusForRunners = queryPlus.withQuery(query.withOverriddenContext(ImmutableMap.of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true))).withoutThreadUnsafeState();
if (QueryContexts.isBySegment(query) || forceChainedExecution) {
ChainedExecutionQueryRunner<ResultRow> runner = new ChainedExecutionQueryRunner<>(queryProcessingPool, queryWatcher, queryables);
return runner.run(queryPlusForRunners, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = QueryContexts.getPriority(query);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final long queryTimeout = QueryContexts.getTimeout(query);
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final long timeoutAt = System.currentTimeMillis() + queryTimeout;
return new BaseSequence<>(new BaseSequence.IteratorMaker<ResultRow, CloseableGrouperIterator<RowBasedKey, ResultRow>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, ResultRow> make() {
final Closer resources = Closer.create();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.register(temporaryStorageHolder);
// If parallelCombine is enabled, we need two merge buffers for parallel aggregating and parallel combining
final int numMergeBuffers = querySpecificConfig.getNumParallelCombineThreads() > 1 ? 2 : 1;
final List<ReferenceCountingResourceHolder<ByteBuffer>> mergeBufferHolders = getMergeBuffersHolder(numMergeBuffers, hasTimeout, timeoutAt);
resources.registerAll(mergeBufferHolders);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder = mergeBufferHolders.get(0);
final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder = numMergeBuffers == 2 ? mergeBufferHolders.get(1) : null;
Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), combineBufferHolder, concurrencyHint, temporaryStorage, spillMapper, // Passed as executor service
queryProcessingPool, priority, hasTimeout, timeoutAt, mergeBufferSize);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.register(grouperHolder);
List<ListenableFuture<AggregateResult>> futures = Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<ResultRow>, ListenableFuture<AggregateResult>>() {
@Override
public ListenableFuture<AggregateResult> apply(final QueryRunner<ResultRow> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<AggregateResult> future = queryProcessingPool.submitRunnerTask(new AbstractPrioritizedQueryRunnerCallable<AggregateResult, ResultRow>(priority, input) {
@Override
public AggregateResult call() {
try (// These variables are used to close releasers automatically.
@SuppressWarnings("unused") Releaser bufferReleaser = mergeBufferHolder.increment();
@SuppressWarnings("unused") Releaser grouperReleaser = grouperHolder.increment()) {
// Return true if OK, false if resources were exhausted.
return input.run(queryPlusForRunners, responseContext).accumulate(AggregateResult.ok(), accumulator);
} catch (QueryInterruptedException | QueryTimeoutException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw new RuntimeException(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, ImmutableList.of(future), hasTimeout, timeoutAt - System.currentTimeMillis());
}
return future;
}
}));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, hasTimeout, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, resources);
} catch (Throwable t) {
// Exception caught while setting up the iterator; release resources.
try {
resources.close();
} catch (Exception ex) {
t.addSuppressed(ex);
}
throw t;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, ResultRow> iterFromMake) {
iterFromMake.close();
}
});
}
Aggregations