use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class GroupByMergedQueryRunner method run.
@Override
public Sequence<T> run(final Query<T> queryParam, final Map<String, Object> responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = configSupplier.get().withOverrides(query);
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair = GroupByQueryHelper.createIndexAccumulatorPair(query, querySpecificConfig, bufferPool, true);
final Pair<Queue, Accumulator<Queue, T>> bySegmentAccumulatorPair = GroupByQueryHelper.createBySegmentAccumulatorPair();
final boolean bySegment = BaseQuery.getContextBySegment(query, false);
final int priority = BaseQuery.getContextPriority(query, 0);
ListenableFuture<List<Void>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<T>, ListenableFuture<Void>>() {
@Override
public ListenableFuture<Void> apply(final QueryRunner<T> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Void> future = exec.submit(new AbstractPrioritizedCallable<Void>(priority) {
@Override
public Void call() throws Exception {
try {
if (bySegment) {
input.run(queryParam, responseContext).accumulate(bySegmentAccumulatorPair.lhs, bySegmentAccumulatorPair.rhs);
} else {
input.run(queryParam, responseContext).accumulate(indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
}
return null;
} catch (QueryInterruptedException e) {
throw Throwables.propagate(e);
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, future, indexAccumulatorPair.lhs);
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, indexAccumulatorPair.lhs);
}
if (bySegment) {
return Sequences.simple(bySegmentAccumulatorPair.lhs);
}
return Sequences.withBaggage(Sequences.simple(Iterables.transform(indexAccumulatorPair.lhs.iterableWithPostAggregations(null, query.isDescending()), new Function<Row, T>() {
@Override
public T apply(Row input) {
return (T) input;
}
})), indexAccumulatorPair.lhs);
}
use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
* been applied to the input rows yet, for example, in a nested query, if an extraction function is being
* applied in the outer query to a field of the inner query. This method must apply those transformations.
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
} else {
grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {
@Override
public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
BaseQuery.checkInterrupted();
if (theGrouper == null) {
// Pass-through null returns without doing more work.
return null;
}
if (!theGrouper.isInitialized()) {
theGrouper.init();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
if (!didAggregate) {
// null return means grouping resources were exhausted.
return null;
}
columnSelectorRow.set(null);
return theGrouper;
}
};
return new Pair<>(grouper, accumulator);
}
use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class SegmentAnalyzer method analyzeStringColumn.
private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
int cardinality = 0;
long size = 0;
Comparable min = null;
Comparable max = null;
if (analyzingCardinality()) {
cardinality = storageAdapter.getDimensionCardinality(columnName);
}
if (analyzingSize()) {
final long start = storageAdapter.getMinTime().getMillis();
final long end = storageAdapter.getMaxTime().getMillis();
final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false);
size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {
@Override
public Long accumulate(Long accumulated, Cursor cursor) {
DimensionSelector selector = cursor.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
if (selector == null) {
return accumulated;
}
long current = accumulated;
while (!cursor.isDone()) {
final IndexedInts vals = selector.getRow();
for (int i = 0; i < vals.size(); ++i) {
final String dimVal = selector.lookupName(vals.get(i));
if (dimVal != null && !dimVal.isEmpty()) {
current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
}
}
cursor.advance();
}
return current;
}
});
}
if (analyzingMinMax()) {
min = storageAdapter.getMinValue(columnName);
max = storageAdapter.getMaxValue(columnName);
}
return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, cardinality, min, max, null);
}
use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.
the class SpecificSegmentQueryRunner method run.
@Override
public Sequence<T> run(final Query<T> input, final Map<String, Object> responseContext) {
final Query<T> query = input.withQuerySegmentSpec(specificSpec);
final Thread currThread = Thread.currentThread();
final String currThreadName = currThread.getName();
final String newName = String.format("%s_%s_%s", query.getType(), query.getDataSource(), query.getIntervals());
final Sequence<T> baseSequence = doNamed(currThread, currThreadName, newName, new Supplier<Sequence<T>>() {
@Override
public Sequence<T> get() {
return base.run(query, responseContext);
}
});
Sequence<T> segmentMissingCatchingSequence = new Sequence<T>() {
@Override
public <OutType> OutType accumulate(final OutType initValue, final Accumulator<OutType, T> accumulator) {
try {
return baseSequence.accumulate(initValue, accumulator);
} catch (SegmentMissingException e) {
appendMissingSegment(responseContext);
return initValue;
}
}
@Override
public <OutType> Yielder<OutType> toYielder(final OutType initValue, final YieldingAccumulator<OutType, T> accumulator) {
try {
return makeYielder(baseSequence.toYielder(initValue, accumulator));
} catch (SegmentMissingException e) {
appendMissingSegment(responseContext);
return Yielders.done(initValue, null);
}
}
private <OutType> Yielder<OutType> makeYielder(final Yielder<OutType> yielder) {
return new Yielder<OutType>() {
@Override
public OutType get() {
return yielder.get();
}
@Override
public Yielder<OutType> next(final OutType initValue) {
try {
return yielder.next(initValue);
} catch (SegmentMissingException e) {
appendMissingSegment(responseContext);
return Yielders.done(initValue, null);
}
}
@Override
public boolean isDone() {
return yielder.isDone();
}
@Override
public void close() throws IOException {
yielder.close();
}
};
}
};
return Sequences.wrap(segmentMissingCatchingSequence, new SequenceWrapper() {
@Override
public <RetType> RetType wrap(Supplier<RetType> sequenceProcessing) {
return doNamed(currThread, currThreadName, newName, sequenceProcessing);
}
});
}
Aggregations