use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByMergedQueryRunner method run.
@Override
public Sequence<T> run(final Query<T> queryParam, final Map<String, Object> responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = configSupplier.get().withOverrides(query);
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair = GroupByQueryHelper.createIndexAccumulatorPair(query, querySpecificConfig, bufferPool, true);
final Pair<Queue, Accumulator<Queue, T>> bySegmentAccumulatorPair = GroupByQueryHelper.createBySegmentAccumulatorPair();
final boolean bySegment = BaseQuery.getContextBySegment(query, false);
final int priority = BaseQuery.getContextPriority(query, 0);
ListenableFuture<List<Void>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<T>, ListenableFuture<Void>>() {
@Override
public ListenableFuture<Void> apply(final QueryRunner<T> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Void> future = exec.submit(new AbstractPrioritizedCallable<Void>(priority) {
@Override
public Void call() throws Exception {
try {
if (bySegment) {
input.run(queryParam, responseContext).accumulate(bySegmentAccumulatorPair.lhs, bySegmentAccumulatorPair.rhs);
} else {
input.run(queryParam, responseContext).accumulate(indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
}
return null;
} catch (QueryInterruptedException e) {
throw Throwables.propagate(e);
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, future, indexAccumulatorPair.lhs);
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, indexAccumulatorPair.lhs);
}
if (bySegment) {
return Sequences.simple(bySegmentAccumulatorPair.lhs);
}
return Sequences.withBaggage(Sequences.simple(Iterables.transform(indexAccumulatorPair.lhs.iterableWithPostAggregations(null, query.isDescending()), new Function<Row, T>() {
@Override
public T apply(Row input) {
return (T) input;
}
})), indexAccumulatorPair.lhs);
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.
@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
final GroupByQueryConfig config = new GroupByQueryConfig();
config.setMaxIntermediateRows(10000);
final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
String visitor_id = "visitor_id";
String client_type = "client_type";
long timestamp = System.currentTimeMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(client_type, client_type))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
basicQueries.put("A", queryA);
}
{
// basic.nested
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).build();
GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).build();
basicQueries.put("nested", queryA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleSchema = BenchmarkSchemas.SCHEMA_MAP.get("simple");
{
// simple.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.STRING))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
simpleQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simple", simpleQueries);
Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleLongSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleLong");
{
// simpleLong.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleLongSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.LONG))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
simpleLongQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleFloatSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleFloat");
{
// simpleFloat.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleFloatSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
simpleFloatQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByBenchmark method queryMultiQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole) throws Exception {
QueryToolChest<Row, GroupByQuery> toolChest = factory.getToolchest();
QueryRunner<Row> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(executorService, makeMultiRunners())), (QueryToolChest) toolChest);
Sequence<Row> queryResult = theRunner.run(query, Maps.<String, Object>newHashMap());
List<Row> results = Sequences.toList(queryResult, Lists.<Row>newArrayList());
for (Row result : results) {
blackhole.consume(result);
}
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByBenchmark method queryMultiQueryableIndexWithSpilling.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndexWithSpilling(Blackhole blackhole) throws Exception {
QueryToolChest<Row, GroupByQuery> toolChest = factory.getToolchest();
QueryRunner<Row> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(executorService, makeMultiRunners())), (QueryToolChest) toolChest);
final GroupByQuery spillingQuery = query.withOverriddenContext(ImmutableMap.<String, Object>of("bufferGrouperMaxSize", 4000));
Sequence<Row> queryResult = theRunner.run(spillingQuery, Maps.<String, Object>newHashMap());
List<Row> results = Sequences.toList(queryResult, Lists.<Row>newArrayList());
for (Row result : results) {
blackhole.consume(result);
}
}
Aggregations