use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class VarianceGroupByQueryTest method testGroupBy.
@Test
public void testGroupBy() {
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(VarianceTestHelper.rowsCount, VarianceTestHelper.indexVarianceAggr, new LongSumAggregatorFactory("idx", "index"))).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(VarianceTestHelper.stddevOfIndexPostAggr)).setGranularity(QueryRunnerTestHelper.dayGran).build();
VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "idx", "index_stddev", "index_var" });
List<Row> expectedResults = builder.add("2011-04-01", "automotive", 1L, 135L, 0d, 0d).add("2011-04-01", "business", 1L, 118L, 0d, 0d).add("2011-04-01", "entertainment", 1L, 158L, 0d, 0d).add("2011-04-01", "health", 1L, 120L, 0d, 0d).add("2011-04-01", "mezzanine", 3L, 2870L, 737.0179286322613d, 543195.4271253889d).add("2011-04-01", "news", 1L, 121L, 0d, 0d).add("2011-04-01", "premium", 3L, 2900L, 726.6322593583996d, 527994.4403402924d).add("2011-04-01", "technology", 1L, 78L, 0d, 0d).add("2011-04-01", "travel", 1L, 119L, 0d, 0d).add("2011-04-02", "automotive", 1L, 147L, 0d, 0d).add("2011-04-02", "business", 1L, 112L, 0d, 0d).add("2011-04-02", "entertainment", 1L, 166L, 0d, 0d).add("2011-04-02", "health", 1L, 113L, 0d, 0d).add("2011-04-02", "mezzanine", 3L, 2447L, 611.3420766546617d, 373739.13468843425d).add("2011-04-02", "news", 1L, 114L, 0d, 0d).add("2011-04-02", "premium", 3L, 2505L, 621.3898134843073d, 386125.30030206224d).add("2011-04-02", "technology", 1L, 97L, 0d, 0d).add("2011-04-02", "travel", 1L, 126L, 0d, 0d).build();
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class DataSourceTest method testQueryDataSource.
@Test
public void testQueryDataSource() throws IOException {
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
String dataSourceJSON = "{\"type\":\"query\", \"query\":" + jsonMapper.writeValueAsString(query) + "}";
DataSource dataSource = jsonMapper.readValue(dataSourceJSON, DataSource.class);
Assert.assertEquals(new QueryDataSource(query), dataSource);
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class MultiValuedDimensionTest method testGroupByWithDimFilterAndWithFilteredDimSpec.
@Test
public void testGroupByWithDimFilterAndWithFilteredDimSpec() throws Exception {
GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(Lists.<DimensionSpec>newArrayList(new RegexFilteredDimensionSpec(new DefaultDimensionSpec("tags", "tags"), "t3"))).setAggregatorSpecs(Arrays.asList(new AggregatorFactory[] { new CountAggregatorFactory("count") })).setDimFilter(new SelectorDimFilter("tags", "t3", null)).build();
Sequence<Row> result = helper.runQueryOnSegmentsObjs(ImmutableList.<Segment>of(new QueryableIndexSegment("sid1", queryableIndex), new IncrementalIndexSegment(incrementalIndex, "sid2")), query);
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L));
TestHelper.assertExpectedObjects(expectedResults, Sequences.toList(result, new ArrayList<Row>()), "");
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.
@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
final GroupByQueryConfig config = new GroupByQueryConfig();
config.setMaxIntermediateRows(10000);
final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
String visitor_id = "visitor_id";
String client_type = "client_type";
long timestamp = System.currentTimeMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(client_type, client_type))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
}
final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = BaseQuery.getContextPriority(query, 0);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, Row> make() {
final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.add(temporaryStorageHolder);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
try {
// This will potentially block if there are no merge buffers left in the pool.
final long timeout = timeoutAt - System.currentTimeMillis();
if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
throw new TimeoutException();
}
resources.add(mergeBufferHolder);
} catch (Exception e) {
throw new QueryInterruptedException(e);
}
Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.add(grouperHolder);
ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {
@Override
public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {
@Override
public Boolean call() throws Exception {
try (Releaser bufferReleaser = mergeBufferHolder.increment();
Releaser grouperReleaser = grouperHolder.increment()) {
final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
// Return true if OK, false if resources were exhausted.
return retVal == grouper;
} catch (QueryInterruptedException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {
@Override
public void close() throws IOException {
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
}
});
} catch (Throwable e) {
// Exception caught while setting up the iterator; release resources.
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
throw e;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
iterFromMake.close();
}
});
}
Aggregations