Search in sources :

Example 16 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByMergedQueryRunner method run.

@Override
public Sequence<T> run(final Query<T> queryParam, final Map<String, Object> responseContext) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = configSupplier.get().withOverrides(query);
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair = GroupByQueryHelper.createIndexAccumulatorPair(query, querySpecificConfig, bufferPool, true);
    final Pair<Queue, Accumulator<Queue, T>> bySegmentAccumulatorPair = GroupByQueryHelper.createBySegmentAccumulatorPair();
    final boolean bySegment = BaseQuery.getContextBySegment(query, false);
    final int priority = BaseQuery.getContextPriority(query, 0);
    ListenableFuture<List<Void>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<T>, ListenableFuture<Void>>() {

        @Override
        public ListenableFuture<Void> apply(final QueryRunner<T> input) {
            if (input == null) {
                throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
            }
            ListenableFuture<Void> future = exec.submit(new AbstractPrioritizedCallable<Void>(priority) {

                @Override
                public Void call() throws Exception {
                    try {
                        if (bySegment) {
                            input.run(queryParam, responseContext).accumulate(bySegmentAccumulatorPair.lhs, bySegmentAccumulatorPair.rhs);
                        } else {
                            input.run(queryParam, responseContext).accumulate(indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
                        }
                        return null;
                    } catch (QueryInterruptedException e) {
                        throw Throwables.propagate(e);
                    } catch (Exception e) {
                        log.error(e, "Exception with one of the sequences!");
                        throw Throwables.propagate(e);
                    }
                }
            });
            if (isSingleThreaded) {
                waitForFutureCompletion(query, future, indexAccumulatorPair.lhs);
            }
            return future;
        }
    })));
    if (!isSingleThreaded) {
        waitForFutureCompletion(query, futures, indexAccumulatorPair.lhs);
    }
    if (bySegment) {
        return Sequences.simple(bySegmentAccumulatorPair.lhs);
    }
    return Sequences.withBaggage(Sequences.simple(Iterables.transform(indexAccumulatorPair.lhs.iterableWithPostAggregations(null, query.isDescending()), new Function<Row, T>() {

        @Override
        public T apply(Row input) {
            return (T) input;
        }
    })), indexAccumulatorPair.lhs);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) GroupByQuery(io.druid.query.groupby.GroupByQuery) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(io.druid.java.util.common.ISE) List(java.util.List) Row(io.druid.data.input.Row) Queue(java.util.Queue)

Example 17 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.

@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
    final GroupByQueryConfig config = new GroupByQueryConfig();
    config.setMaxIntermediateRows(10000);
    final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    long timestamp = System.currentTimeMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(client_type, client_type))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
    TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
Also used : GroupByQueryRunnerFactory(io.druid.query.groupby.GroupByQueryRunnerFactory) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Segment(io.druid.segment.Segment) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 18 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        basicQueries.put("A", queryA);
    }
    {
        // basic.nested
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).build();
        GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).build();
        basicQueries.put("nested", queryA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleSchema = BenchmarkSchemas.SCHEMA_MAP.get("simple");
    {
        // simple.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.STRING))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simple", simpleQueries);
    Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleLongSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleLong");
    {
        // simpleLong.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleLongSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.LONG))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleLongQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
    Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleFloatSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleFloat");
    {
        // simpleFloat.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleFloatSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleFloatQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
}
Also used : GroupByQuery(io.druid.query.groupby.GroupByQuery) BenchmarkSchemaInfo(io.druid.benchmark.datagen.BenchmarkSchemaInfo) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) QuerySegmentSpec(io.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) LinkedHashMap(java.util.LinkedHashMap)

Example 19 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method queryMultiQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole) throws Exception {
    QueryToolChest<Row, GroupByQuery> toolChest = factory.getToolchest();
    QueryRunner<Row> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(executorService, makeMultiRunners())), (QueryToolChest) toolChest);
    Sequence<Row> queryResult = theRunner.run(query, Maps.<String, Object>newHashMap());
    List<Row> results = Sequences.toList(queryResult, Lists.<Row>newArrayList());
    for (Row result : results) {
        blackhole.consume(result);
    }
}
Also used : GroupByQuery(io.druid.query.groupby.GroupByQuery) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 20 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method queryMultiQueryableIndexWithSpilling.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndexWithSpilling(Blackhole blackhole) throws Exception {
    QueryToolChest<Row, GroupByQuery> toolChest = factory.getToolchest();
    QueryRunner<Row> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(executorService, makeMultiRunners())), (QueryToolChest) toolChest);
    final GroupByQuery spillingQuery = query.withOverriddenContext(ImmutableMap.<String, Object>of("bufferGrouperMaxSize", 4000));
    Sequence<Row> queryResult = theRunner.run(spillingQuery, Maps.<String, Object>newHashMap());
    List<Row> results = Sequences.toList(queryResult, Lists.<Row>newArrayList());
    for (Row result : results) {
        blackhole.consume(result);
    }
}
Also used : GroupByQuery(io.druid.query.groupby.GroupByQuery) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Aggregations

GroupByQuery (io.druid.query.groupby.GroupByQuery)26 Row (io.druid.data.input.Row)19 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)15 Test (org.junit.Test)13 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)12 DimensionSpec (io.druid.query.dimension.DimensionSpec)10 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 ArrayList (java.util.ArrayList)7 InputRow (io.druid.data.input.InputRow)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 List (java.util.List)6 QueryRunner (io.druid.query.QueryRunner)5 PostAggregator (io.druid.query.aggregation.PostAggregator)5 Interval (org.joda.time.Interval)5 Function (com.google.common.base.Function)4 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)4 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 MultipleIntervalSegmentSpec (io.druid.query.spec.MultipleIntervalSegmentSpec)4 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)4 ISE (io.druid.java.util.common.ISE)3