use of io.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class TimestampGroupByAggregationTest method constructorFeeder.
@Parameterized.Parameters(name = "{index}: Test for {0}, config = {1}")
public static Iterable<Object[]> constructorFeeder() {
final List<Object[]> constructors = Lists.newArrayList();
final List<List<Object>> partialConstructors = ImmutableList.<List<Object>>of(ImmutableList.<Object>of("timeMin", "tmin", "time_min", TimestampMinAggregatorFactory.class, DateTime.parse("2011-01-12T01:00:00.000Z")), ImmutableList.<Object>of("timeMax", "tmax", "time_max", TimestampMaxAggregatorFactory.class, DateTime.parse("2011-01-31T01:00:00.000Z")));
for (final List<Object> partialConstructor : partialConstructors) {
for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) {
final List<Object> constructor = Lists.newArrayList(partialConstructor);
constructor.add(config);
constructors.add(constructor.toArray());
}
}
return constructors;
}
use of io.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
use of io.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class GroupByRowProcessor method process.
public static Sequence<Row> process(final Query queryParam, final Sequence<Row> rows, final Map<String, ValueType> rowSignature, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final AggregatorFactory[] aggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
aggregatorFactories[i] = query.getAggregatorSpecs().get(i);
}
final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final List<Interval> queryIntervals = query.getIntervals();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
final SettableSupplier<Row> rowSupplier = new SettableSupplier<>();
final RowBasedColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature);
final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
final FilteredSequence<Row> filteredSequence = new FilteredSequence<>(rows, new Predicate<Row>() {
@Override
public boolean apply(Row input) {
boolean inInterval = false;
DateTime rowTime = input.getTimestamp();
for (Interval queryInterval : queryIntervals) {
if (queryInterval.contains(rowTime)) {
inInterval = true;
break;
}
}
if (!inInterval) {
return false;
}
rowSupplier.set(input);
return filterMatcher.matches();
}
});
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, Row> make() {
// This contains all closeable objects which are closed when the returned iterator iterates all the elements,
// or an exceptions is thrown. The objects are closed in their reverse order.
final List<Closeable> closeOnExit = Lists.newArrayList();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
closeOnExit.add(temporaryStorage);
Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, true, rowSignature, querySpecificConfig, new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
closeOnExit.add(mergeBufferHolder);
return mergeBufferHolder.get();
}
}, -1, temporaryStorage, spillMapper, aggregatorFactories);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
closeOnExit.add(grouper);
final Grouper<RowBasedKey> retVal = filteredSequence.accumulate(grouper, accumulator);
if (retVal != grouper) {
throw GroupByQueryHelper.throwAccumulationResourceLimitExceededException();
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {
@Override
public void close() throws IOException {
for (Closeable closeable : Lists.reverse(closeOnExit)) {
CloseQuietly.close(closeable);
}
}
});
} catch (Throwable e) {
// Exception caught while setting up the iterator; release resources.
for (Closeable closeable : Lists.reverse(closeOnExit)) {
CloseQuietly.close(closeable);
}
throw e;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
iterFromMake.close();
}
});
}
use of io.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class ApproximateHistogramGroupByQueryTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() throws IOException {
final GroupByQueryConfig v1Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1";
}
};
final GroupByQueryConfig v1SingleThreadedConfig = new GroupByQueryConfig() {
@Override
public boolean isSingleThreaded() {
return true;
}
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1SingleThreaded";
}
};
final GroupByQueryConfig v2Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
@Override
public String toString() {
return "v2";
}
};
v1Config.setMaxIntermediateRows(10000);
v1SingleThreadedConfig.setMaxIntermediateRows(10000);
final List<Object[]> constructors = Lists.newArrayList();
final List<GroupByQueryConfig> configs = ImmutableList.of(v1Config, v1SingleThreadedConfig, v2Config);
for (GroupByQueryConfig config : configs) {
final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
for (QueryRunner<Row> runner : QueryRunnerTestHelper.makeQueryRunners(factory)) {
final String testName = String.format("config=%s, runner=%s", config.toString(), runner.toString());
constructors.add(new Object[] { testName, factory, runner });
}
}
return constructors;
}
use of io.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
}
final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = BaseQuery.getContextPriority(query, 0);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, Row> make() {
final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.add(temporaryStorageHolder);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
try {
// This will potentially block if there are no merge buffers left in the pool.
final long timeout = timeoutAt - System.currentTimeMillis();
if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
throw new TimeoutException();
}
resources.add(mergeBufferHolder);
} catch (Exception e) {
throw new QueryInterruptedException(e);
}
Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.add(grouperHolder);
ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {
@Override
public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {
@Override
public Boolean call() throws Exception {
try (Releaser bufferReleaser = mergeBufferHolder.increment();
Releaser grouperReleaser = grouperHolder.increment()) {
final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
// Return true if OK, false if resources were exhausted.
return retVal == grouper;
} catch (QueryInterruptedException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {
@Override
public void close() throws IOException {
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
}
});
} catch (Throwable e) {
// Exception caught while setting up the iterator; release resources.
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
throw e;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
iterFromMake.close();
}
});
}
Aggregations