use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class GroupByQueryHelper method createBySegmentAccumulatorPair.
public static <T> Pair<Queue, Accumulator<Queue, T>> createBySegmentAccumulatorPair() {
// In parallel query runner multiple threads add to this queue concurrently
Queue init = new ConcurrentLinkedQueue<>();
Accumulator<Queue, T> accumulator = new Accumulator<Queue, T>() {
@Override
public Queue accumulate(Queue accumulated, T in) {
if (in == null) {
throw new ISE("Cannot have null result");
}
accumulated.offer(in);
return accumulated;
}
};
return new Pair<>(init, accumulator);
}
use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
*
* The pair will operate in one of two modes:
*
* 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
* results originating from the provided "query".
*
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
* ResultRows are assumed to be results originating from the provided "subquery".
*
* @param query query that we are grouping for
* @param subquery optional subquery that we are receiving results from (see combining vs. subquery
* mode above)
* @param config groupBy query config
* @param bufferSupplier supplier of merge buffers
* @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
* @param concurrencyHint -1 for single-threaded Grouper, >=1 for concurrent Grouper
* @param temporaryStorage temporary storage used for spilling from the Grouper
* @param spillMapper object mapper used for spilling from the Grouper
* @param grouperSorter executor service used for parallel combining. Unused if concurrencyHint = -1, and may
* be null in that case
* @param priority query priority
* @param hasQueryTimeout whether or not this query has a timeout
* @param queryTimeoutAt when this query times out, in milliseconds since the epoch
* @param mergeBufferSize size of the merge buffers from "bufferSupplier"
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
if (concurrencyHint >= 1) {
Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
}
// See method-level javadoc; we go into combining mode if there is no subquery.
final boolean combining = subquery == null;
final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
// Apply virtual columns if we are in subquery (non-combining) mode.
if (!combining) {
columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
}
final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
boolean sortHasNonGroupingFields = false;
if (willApplyLimitPushDown) {
sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
}
final AggregatorFactory[] aggregatorFactories;
if (combining) {
aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
} else {
aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
}
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
final Predicate<ResultRow> rowPredicate;
if (combining) {
// Filters are not applied in combining mode.
rowPredicate = row -> true;
} else {
rowPredicate = getResultRowPredicate(query, subquery);
}
final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
BaseQuery.checkInterrupted();
if (priorResult != null && !priorResult.isOk()) {
// Pass-through error returns without doing more work.
return priorResult;
}
if (!grouper.isInitialized()) {
grouper.init();
}
if (!rowPredicate.test(row)) {
return AggregateResult.ok();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
columnSelectorRow.set(null);
return aggregateResult;
};
return new Pair<>(grouper, accumulator);
}
use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<ResultRow> run(final QueryPlus<ResultRow> queryPlus, final ResponseContext responseContext) {
final GroupByQuery query = (GroupByQuery) queryPlus.getQuery();
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final QueryPlus<ResultRow> queryPlusForRunners = queryPlus.withQuery(query.withOverriddenContext(ImmutableMap.of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true))).withoutThreadUnsafeState();
if (QueryContexts.isBySegment(query) || forceChainedExecution) {
ChainedExecutionQueryRunner<ResultRow> runner = new ChainedExecutionQueryRunner<>(queryProcessingPool, queryWatcher, queryables);
return runner.run(queryPlusForRunners, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = QueryContexts.getPriority(query);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final long queryTimeout = QueryContexts.getTimeout(query);
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final long timeoutAt = System.currentTimeMillis() + queryTimeout;
return new BaseSequence<>(new BaseSequence.IteratorMaker<ResultRow, CloseableGrouperIterator<RowBasedKey, ResultRow>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, ResultRow> make() {
final Closer resources = Closer.create();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.register(temporaryStorageHolder);
// If parallelCombine is enabled, we need two merge buffers for parallel aggregating and parallel combining
final int numMergeBuffers = querySpecificConfig.getNumParallelCombineThreads() > 1 ? 2 : 1;
final List<ReferenceCountingResourceHolder<ByteBuffer>> mergeBufferHolders = getMergeBuffersHolder(numMergeBuffers, hasTimeout, timeoutAt);
resources.registerAll(mergeBufferHolders);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder = mergeBufferHolders.get(0);
final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder = numMergeBuffers == 2 ? mergeBufferHolders.get(1) : null;
Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), combineBufferHolder, concurrencyHint, temporaryStorage, spillMapper, // Passed as executor service
queryProcessingPool, priority, hasTimeout, timeoutAt, mergeBufferSize);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.register(grouperHolder);
List<ListenableFuture<AggregateResult>> futures = Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<ResultRow>, ListenableFuture<AggregateResult>>() {
@Override
public ListenableFuture<AggregateResult> apply(final QueryRunner<ResultRow> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<AggregateResult> future = queryProcessingPool.submitRunnerTask(new AbstractPrioritizedQueryRunnerCallable<AggregateResult, ResultRow>(priority, input) {
@Override
public AggregateResult call() {
try (// These variables are used to close releasers automatically.
@SuppressWarnings("unused") Releaser bufferReleaser = mergeBufferHolder.increment();
@SuppressWarnings("unused") Releaser grouperReleaser = grouperHolder.increment()) {
// Return true if OK, false if resources were exhausted.
return input.run(queryPlusForRunners, responseContext).accumulate(AggregateResult.ok(), accumulator);
} catch (QueryInterruptedException | QueryTimeoutException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw new RuntimeException(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, ImmutableList.of(future), hasTimeout, timeoutAt - System.currentTimeMillis());
}
return future;
}
}));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, hasTimeout, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, resources);
} catch (Throwable t) {
// Exception caught while setting up the iterator; release resources.
try {
resources.close();
} catch (Exception ex) {
t.addSuppressed(ex);
}
throw t;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, ResultRow> iterFromMake) {
iterFromMake.close();
}
});
}
use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class GroupByRowProcessor method process.
/**
* Process the input of sequence "rows" (output by "subquery") based on "query" and returns a {@link ResultSupplier}.
*
* In addition to grouping using dimensions and metrics, it will also apply filters (both DimFilter and interval
* filters).
*
* The input sequence is processed synchronously with the call to this method, and result iteration happens lazy upon
* calls to the {@link ResultSupplier}. Make sure to close it when you're done.
*/
public static ResultSupplier process(final GroupByQuery query, final GroupByQuery subquery, final Sequence<ResultRow> rows, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir, final int mergeBufferSize) {
final Closer closeOnExit = Closer.create();
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
closeOnExit.register(temporaryStorage);
Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, subquery, querySpecificConfig, new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
closeOnExit.register(mergeBufferHolder);
return mergeBufferHolder.get();
}
}, temporaryStorage, spillMapper, mergeBufferSize);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
closeOnExit.register(grouper);
final AggregateResult retVal = rows.accumulate(AggregateResult.ok(), accumulator);
if (!retVal.isOk()) {
throw new ResourceLimitExceededException(retVal.getReason());
}
return new ResultSupplier() {
@Override
public Sequence<ResultRow> results(@Nullable List<DimensionSpec> dimensionsToInclude) {
return getRowsFromGrouper(query, grouper, dimensionsToInclude);
}
@Override
public void close() throws IOException {
closeOnExit.close();
}
};
}
use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class SegmentAnalyzer method analyzeStringColumn.
private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
int cardinality = 0;
long size = 0;
Comparable min = null;
Comparable max = null;
if (analyzingCardinality()) {
cardinality = storageAdapter.getDimensionCardinality(columnName);
}
if (analyzingSize()) {
final DateTime start = storageAdapter.getMinTime();
final DateTime end = storageAdapter.getMaxTime();
final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null);
size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {
@Override
public Long accumulate(Long accumulated, Cursor cursor) {
DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
if (selector == null) {
return accumulated;
}
long current = accumulated;
while (!cursor.isDone()) {
final IndexedInts row = selector.getRow();
for (int i = 0, rowSize = row.size(); i < rowSize; ++i) {
final String dimVal = selector.lookupName(row.get(i));
if (dimVal != null && !dimVal.isEmpty()) {
current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
}
}
cursor.advance();
}
return current;
}
});
}
if (analyzingMinMax()) {
min = storageAdapter.getMinValue(columnName);
max = storageAdapter.getMaxValue(columnName);
}
return new ColumnAnalysis(capabilities.toColumnType(), capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), // if we don't know for sure, then we should plan to check for nulls
capabilities.hasNulls().isMaybeTrue(), size, cardinality, min, max, null);
}
Aggregations