use of io.druid.java.util.common.Pair in project druid by druid-io.
the class MoveTimeFiltersToIntervals method extractConvertibleTimeBounds.
/**
* Extract bound filters on __time that can be converted to query-level "intervals".
*
* @return pair of new dimFilter + RangeSet of __time that should be ANDed together. Either can be null but not both.
*/
private static Pair<DimFilter, RangeSet<Long>> extractConvertibleTimeBounds(final DimFilter filter) {
if (filter instanceof AndDimFilter) {
final List<DimFilter> children = ((AndDimFilter) filter).getFields();
final List<DimFilter> newChildren = Lists.newArrayList();
final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
for (DimFilter child : children) {
final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
if (pair.lhs != null) {
newChildren.add(pair.lhs);
}
if (pair.rhs != null) {
rangeSets.add(pair.rhs);
}
}
final DimFilter newFilter;
if (newChildren.size() == 0) {
newFilter = null;
} else if (newChildren.size() == 1) {
newFilter = newChildren.get(0);
} else {
newFilter = new AndDimFilter(newChildren);
}
return Pair.of(newFilter, rangeSets.isEmpty() ? null : RangeSets.intersectRangeSets(rangeSets));
} else if (filter instanceof OrDimFilter) {
final List<DimFilter> children = ((OrDimFilter) filter).getFields();
final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
boolean allCompletelyConverted = true;
boolean allHadIntervals = true;
for (DimFilter child : children) {
final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
if (pair.lhs != null) {
allCompletelyConverted = false;
}
if (pair.rhs != null) {
rangeSets.add(pair.rhs);
} else {
allHadIntervals = false;
}
}
if (allCompletelyConverted) {
return Pair.of(null, RangeSets.unionRangeSets(rangeSets));
} else {
return Pair.of(filter, allHadIntervals ? RangeSets.unionRangeSets(rangeSets) : null);
}
} else if (filter instanceof NotDimFilter) {
final DimFilter child = ((NotDimFilter) filter).getField();
final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
if (pair.rhs != null && pair.lhs == null) {
return Pair.of(null, pair.rhs.complement());
} else {
return Pair.of(filter, null);
}
} else if (filter instanceof BoundDimFilter) {
final BoundDimFilter bound = (BoundDimFilter) filter;
if (BoundRefKey.from(bound).equals(TIME_BOUND_REF_KEY)) {
return Pair.of(null, RangeSets.of(toLongRange(Bounds.toRange(bound))));
} else {
return Pair.of(filter, null);
}
} else {
return Pair.of(filter, null);
}
}
use of io.druid.java.util.common.Pair in project druid by druid-io.
the class CombiningSequenceTest method testCombining.
private void testCombining(List<Pair<Integer, Integer>> pairs, List<Pair<Integer, Integer>> expected, int limit) throws Exception {
// Test that closing works too
final CountDownLatch closed = new CountDownLatch(1);
final Closeable closeable = new Closeable() {
@Override
public void close() throws IOException {
closed.countDown();
}
};
Sequence<Pair<Integer, Integer>> seq = Sequences.limit(CombiningSequence.create(Sequences.withBaggage(Sequences.simple(pairs), closeable), Ordering.natural().onResultOf(Pair.<Integer, Integer>lhsFn()), new BinaryFn<Pair<Integer, Integer>, Pair<Integer, Integer>, Pair<Integer, Integer>>() {
@Override
public Pair<Integer, Integer> apply(Pair<Integer, Integer> lhs, Pair<Integer, Integer> rhs) {
if (lhs == null) {
return rhs;
}
if (rhs == null) {
return lhs;
}
return Pair.of(lhs.lhs, lhs.rhs + rhs.rhs);
}
}), limit);
List<Pair<Integer, Integer>> merged = Sequences.toList(seq, Lists.<Pair<Integer, Integer>>newArrayList());
Assert.assertEquals(expected, merged);
Yielder<Pair<Integer, Integer>> yielder = seq.toYielder(null, new YieldingAccumulator<Pair<Integer, Integer>, Pair<Integer, Integer>>() {
int count = 0;
@Override
public Pair<Integer, Integer> accumulate(Pair<Integer, Integer> lhs, Pair<Integer, Integer> rhs) {
count++;
if (count % yieldEvery == 0) {
yield();
}
return rhs;
}
});
Iterator<Pair<Integer, Integer>> expectedVals = Iterators.filter(expected.iterator(), new Predicate<Pair<Integer, Integer>>() {
int count = 0;
@Override
public boolean apply(@Nullable Pair<Integer, Integer> input) {
count++;
if (count % yieldEvery == 0) {
return true;
}
return false;
}
});
if (expectedVals.hasNext()) {
while (!yielder.isDone()) {
final Pair<Integer, Integer> expectedVal = expectedVals.next();
final Pair<Integer, Integer> actual = yielder.get();
Assert.assertEquals(expectedVal, actual);
yielder = yielder.next(actual);
}
}
Assert.assertTrue(yielder.isDone());
Assert.assertFalse(expectedVals.hasNext());
yielder.close();
Assert.assertTrue("resource closed", closed.await(10000, TimeUnit.MILLISECONDS));
}
use of io.druid.java.util.common.Pair in project druid by druid-io.
the class GroupByQueryHelper method createBySegmentAccumulatorPair.
public static <T> Pair<Queue, Accumulator<Queue, T>> createBySegmentAccumulatorPair() {
// In parallel query runner multiple threads add to this queue concurrently
Queue init = new ConcurrentLinkedQueue<>();
Accumulator<Queue, T> accumulator = new Accumulator<Queue, T>() {
@Override
public Queue accumulate(Queue accumulated, T in) {
if (in == null) {
throw new ISE("Cannot have null result");
}
accumulated.offer(in);
return accumulated;
}
};
return new Pair<>(init, accumulator);
}
use of io.druid.java.util.common.Pair in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
}
final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = BaseQuery.getContextPriority(query, 0);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, Row> make() {
final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.add(temporaryStorageHolder);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
try {
// This will potentially block if there are no merge buffers left in the pool.
final long timeout = timeoutAt - System.currentTimeMillis();
if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
throw new TimeoutException();
}
resources.add(mergeBufferHolder);
} catch (Exception e) {
throw new QueryInterruptedException(e);
}
Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.add(grouperHolder);
ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {
@Override
public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {
@Override
public Boolean call() throws Exception {
try (Releaser bufferReleaser = mergeBufferHolder.increment();
Releaser grouperReleaser = grouperHolder.increment()) {
final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
// Return true if OK, false if resources were exhausted.
return retVal == grouper;
} catch (QueryInterruptedException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {
@Override
public void close() throws IOException {
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
}
});
} catch (Throwable e) {
// Exception caught while setting up the iterator; release resources.
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
throw e;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
iterFromMake.close();
}
});
}
use of io.druid.java.util.common.Pair in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
* been applied to the input rows yet, for example, in a nested query, if an extraction function is being
* applied in the outer query to a field of the inner query. This method must apply those transformations.
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
} else {
grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {
@Override
public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
BaseQuery.checkInterrupted();
if (theGrouper == null) {
// Pass-through null returns without doing more work.
return null;
}
if (!theGrouper.isInitialized()) {
theGrouper.init();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
if (!didAggregate) {
// null return means grouping resources were exhausted.
return null;
}
columnSelectorRow.set(null);
return theGrouper;
}
};
return new Pair<>(grouper, accumulator);
}
Aggregations