Search in sources :

Example 11 with MergeSequence

use of org.apache.druid.java.util.common.guava.MergeSequence in project druid by druid-io.

the class GroupByQueryRunnerTest method testMergeResultsWithLimitPushDownSortByDimDim.

@Test
public void testMergeResultsWithLimitPushDownSortByDimDim() {
    if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
        return;
    }
    GroupByQuery.Builder builder = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.DESCENDING), new OrderByColumnSpec("market", OrderByColumnSpec.Direction.DESCENDING)), 5)).overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)).setGranularity(Granularities.ALL);
    final GroupByQuery allGranQuery = builder.build();
    QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<ResultRow>() {

        @Override
        public Sequence<ResultRow> run(QueryPlus<ResultRow> queryPlus, ResponseContext responseContext) {
            // simulate two daily segments
            final QueryPlus<ResultRow> queryPlus1 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-02/2011-04-03")))));
            final QueryPlus<ResultRow> queryPlus2 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-03/2011-04-04")))));
            return factory.getToolchest().mergeResults((queryPlus3, responseContext1) -> new MergeSequence<>(queryPlus3.getQuery().getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(queryPlus1, responseContext1), runner.run(queryPlus2, responseContext1))))).run(queryPlus, responseContext);
        }
    });
    List<ResultRow> allGranExpectedResults = Arrays.asList(makeRow(allGranQuery, "2011-04-02", "alias", "travel", "market", "spot", "rows", 2L, "idx", 243L), makeRow(allGranQuery, "2011-04-02", "alias", "technology", "market", "spot", "rows", 2L, "idx", 177L), makeRow(allGranQuery, "2011-04-02", "alias", "premium", "market", "upfront", "rows", 2L, "idx", 1817L), makeRow(allGranQuery, "2011-04-02", "alias", "premium", "market", "total_market", "rows", 2L, "idx", 2342L), makeRow(allGranQuery, "2011-04-02", "alias", "premium", "market", "spot", "rows", 2L, "idx", 257L));
    Iterable<ResultRow> results = mergedRunner.run(QueryPlus.wrap(allGranQuery)).toList();
    TestHelper.assertExpectedObjects(allGranExpectedResults, results, "merged");
}
Also used : DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) ChainedExecutionQueryRunner(org.apache.druid.query.ChainedExecutionQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ResponseContext(org.apache.druid.query.context.ResponseContext) QueryPlus(org.apache.druid.query.QueryPlus) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 12 with MergeSequence

use of org.apache.druid.java.util.common.guava.MergeSequence in project druid by druid-io.

the class GroupByQueryRunnerTest method testMergeResultsWithLimitPushDown.

@Test
public void testMergeResultsWithLimitPushDown() {
    if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
        return;
    }
    GroupByQuery.Builder builder = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.DESCENDING)), 5)).overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)).setGranularity(Granularities.ALL);
    final GroupByQuery allGranQuery = builder.build();
    QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<ResultRow>() {

        @Override
        public Sequence<ResultRow> run(QueryPlus<ResultRow> queryPlus, ResponseContext responseContext) {
            // simulate two daily segments
            final QueryPlus<ResultRow> queryPlus1 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-02/2011-04-03")))));
            final QueryPlus<ResultRow> queryPlus2 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-03/2011-04-04")))));
            return factory.getToolchest().mergeResults((queryPlus3, responseContext1) -> new MergeSequence<>(queryPlus3.getQuery().getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(queryPlus1, responseContext1), runner.run(queryPlus2, responseContext1))))).run(queryPlus, responseContext);
        }
    });
    Map<String, Object> context = new HashMap<>();
    List<ResultRow> allGranExpectedResults = Arrays.asList(makeRow(allGranQuery, "2011-04-02", "alias", "travel", "rows", 2L, "idx", 243L), makeRow(allGranQuery, "2011-04-02", "alias", "technology", "rows", 2L, "idx", 177L), makeRow(allGranQuery, "2011-04-02", "alias", "premium", "rows", 6L, "idx", 4416L), makeRow(allGranQuery, "2011-04-02", "alias", "news", "rows", 2L, "idx", 221L), makeRow(allGranQuery, "2011-04-02", "alias", "mezzanine", "rows", 6L, "idx", 4420L));
    TestHelper.assertExpectedObjects(allGranExpectedResults, mergedRunner.run(QueryPlus.wrap(allGranQuery)), "merged");
}
Also used : DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) ChainedExecutionQueryRunner(org.apache.druid.query.ChainedExecutionQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ResponseContext(org.apache.druid.query.context.ResponseContext) QueryPlus(org.apache.druid.query.QueryPlus) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 13 with MergeSequence

use of org.apache.druid.java.util.common.guava.MergeSequence in project druid by druid-io.

the class ScanQueryResultOrderingTest method assertResultsEquals.

private void assertResultsEquals(final ScanQuery query, final List<Integer> expectedResults) {
    final List<List<Pair<SegmentId, QueryRunner<ScanResultValue>>>> serverRunners = new ArrayList<>();
    for (int i = 0; i <= segmentToServerMap.stream().max(Comparator.naturalOrder()).orElse(0); i++) {
        serverRunners.add(new ArrayList<>());
    }
    for (int segmentNumber = 0; segmentNumber < segmentToServerMap.size(); segmentNumber++) {
        final SegmentId segmentId = SEGMENTS.get(segmentNumber).getId();
        final int serverNumber = segmentToServerMap.get(segmentNumber);
        serverRunners.get(serverNumber).add(Pair.of(segmentId, segmentRunners.get(segmentNumber)));
    }
    // Simulates what the Historical servers would do.
    final List<QueryRunner<ScanResultValue>> mergedServerRunners = serverRunners.stream().filter(runners -> !runners.isEmpty()).map(runners -> queryRunnerFactory.getToolchest().mergeResults(new QueryRunner<ScanResultValue>() {

        @Override
        public Sequence<ScanResultValue> run(final QueryPlus<ScanResultValue> queryPlus, final ResponseContext responseContext) {
            return queryRunnerFactory.mergeRunners(Execs.directExecutor(), runners.stream().map(p -> p.rhs).collect(Collectors.toList())).run(queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleSpecificSegmentSpec(runners.stream().map(p -> p.lhs.toDescriptor()).collect(Collectors.toList())))), responseContext);
        }
    })).collect(Collectors.toList());
    // Simulates what the Broker would do.
    final QueryRunner<ScanResultValue> brokerRunner = queryRunnerFactory.getToolchest().mergeResults((queryPlus, responseContext) -> {
        final List<Sequence<ScanResultValue>> sequences = mergedServerRunners.stream().map(runner -> runner.run(queryPlus.withoutThreadUnsafeState())).collect(Collectors.toList());
        return new MergeSequence<>(queryPlus.getQuery().getResultOrdering(), Sequences.simple(sequences));
    });
    // Finally: run the query.
    final List<Integer> results = runQuery((ScanQuery) Druids.ScanQueryBuilder.copy(query).limit(limit).batchSize(batchSize).build().withOverriddenContext(ImmutableMap.of(ScanQueryConfig.CTX_KEY_MAX_ROWS_QUEUED_FOR_ORDERING, maxRowsQueuedForOrdering)), brokerRunner);
    Assert.assertEquals(expectedResults.stream().limit(limit == 0 ? Long.MAX_VALUE : limit).collect(Collectors.toList()), results);
}
Also used : IntStream(java.util.stream.IntStream) QueryPlus(org.apache.druid.query.QueryPlus) Intervals(org.apache.druid.java.util.common.Intervals) RowBasedSegment(org.apache.druid.segment.RowBasedSegment) RunWith(org.junit.runner.RunWith) TreeSet(java.util.TreeSet) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) Druids(org.apache.druid.query.Druids) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequences(org.apache.druid.java.util.common.guava.Sequences) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) ImmutableSortedSet(com.google.common.collect.ImmutableSortedSet) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) ResponseContext(org.apache.druid.query.context.ResponseContext) DateTime(org.joda.time.DateTime) Set(java.util.Set) Test(org.junit.Test) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnType(org.apache.druid.segment.column.ColumnType) DefaultGenericQueryMetricsFactory(org.apache.druid.query.DefaultGenericQueryMetricsFactory) SegmentId(org.apache.druid.timeline.SegmentId) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) QueryRunner(org.apache.druid.query.QueryRunner) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) ResponseContext(org.apache.druid.query.context.ResponseContext) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List)

Example 14 with MergeSequence

use of org.apache.druid.java.util.common.guava.MergeSequence in project druid by druid-io.

the class UnionQueryRunner method run.

@Override
public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext responseContext) {
    Query<T> query = queryPlus.getQuery();
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
    if (analysis.isConcreteTableBased() && analysis.getBaseUnionDataSource().isPresent()) {
        // Union of tables.
        final UnionDataSource unionDataSource = analysis.getBaseUnionDataSource().get();
        if (unionDataSource.getDataSources().isEmpty()) {
            // Shouldn't happen, because UnionDataSource doesn't allow empty unions.
            throw new ISE("Unexpectedly received empty union");
        } else if (unionDataSource.getDataSources().size() == 1) {
            // Single table. Run as a normal query.
            return baseRunner.run(queryPlus.withQuery(Queries.withBaseDataSource(query, Iterables.getOnlyElement(unionDataSource.getDataSources()))), responseContext);
        } else {
            // Split up the tables and merge their results.
            return new MergeSequence<>(query.getResultOrdering(), Sequences.simple(Lists.transform(IntStream.range(0, unionDataSource.getDataSources().size()).mapToObj(i -> new Pair<>(unionDataSource.getDataSources().get(i), i + 1)).collect(Collectors.toList()), (Function<Pair<TableDataSource, Integer>, Sequence<T>>) singleSourceWithIndex -> baseRunner.run(queryPlus.withQuery(Queries.withBaseDataSource(query, singleSourceWithIndex.lhs).withSubQueryId(generateSubqueryId(query.getSubQueryId(), singleSourceWithIndex.lhs.getName(), singleSourceWithIndex.rhs))), responseContext))));
        }
    } else {
        // Not a union of tables. Do nothing special.
        return baseRunner.run(queryPlus, responseContext);
    }
}
Also used : IntStream(java.util.stream.IntStream) Sequence(org.apache.druid.java.util.common.guava.Sequence) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Iterables(com.google.common.collect.Iterables) StringUtils(org.apache.commons.lang.StringUtils) Function(com.google.common.base.Function) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) Pair(org.apache.druid.java.util.common.Pair) Lists(com.google.common.collect.Lists) Sequences(org.apache.druid.java.util.common.guava.Sequences) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Pair(org.apache.druid.java.util.common.Pair)

Aggregations

MergeSequence (org.apache.druid.java.util.common.guava.MergeSequence)14 Sequence (org.apache.druid.java.util.common.guava.Sequence)14 ResponseContext (org.apache.druid.query.context.ResponseContext)14 QueryPlus (org.apache.druid.query.QueryPlus)12 QueryRunner (org.apache.druid.query.QueryRunner)12 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)11 Test (org.junit.Test)11 ChainedExecutionQueryRunner (org.apache.druid.query.ChainedExecutionQueryRunner)10 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)10 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)10 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)5 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)5 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)4 Period (org.joda.time.Period)4 ArrayList (java.util.ArrayList)2 Collectors (java.util.stream.Collectors)2 IntStream (java.util.stream.IntStream)2 Pair (org.apache.druid.java.util.common.Pair)2