Search in sources :

Example 31 with Sequence

use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.

the class ClientQuerySegmentWalker method inlineIfNecessary.

/**
 * Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
 *
 * 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
 * 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
 *
 * @param dataSource           datasource to process.
 * @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
 *                             will consider whether the toolchest can handle a subquery on the datasource using
 *                             {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
 *                             not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
 *                             for an example of a toolchest that can handle subqueries.
 * @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
 *                                    for a particular master query
 * @param maxSubqueryRows      Max rows that all the subqueries generated by a master query can have, combined
 * @param dryRun               if true, does not actually execute any subqueries, but will inline empty result sets.
 */
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
    if (dataSource instanceof QueryDataSource) {
        // This datasource is a subquery.
        final Query subQuery = ((QueryDataSource) dataSource).getQuery();
        final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
        if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
            // Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
            // them (e.g. subqueries nested under a join).
            final Stack<DataSource> stack = new Stack<>();
            DataSource current = dataSource;
            while (current instanceof QueryDataSource) {
                stack.push(current);
                current = Iterables.getOnlyElement(current.getChildren());
            }
            // lgtm [java/contradictory-type-checks]
            assert !(current instanceof QueryDataSource);
            current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            while (!stack.isEmpty()) {
                current = stack.pop().withChildren(Collections.singletonList(current));
            }
            assert current instanceof QueryDataSource;
            if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
                return current;
            } else {
                // We need to consider inlining it.
                return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
            }
        } else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
            // Subquery needs to be inlined. Assign it a subquery id and run it.
            final Sequence<?> queryResults;
            if (dryRun) {
                queryResults = Sequences.empty();
            } else {
                final QueryRunner subqueryRunner = subQuery.getRunner(this);
                queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
            }
            return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
        } else {
            // Cannot inline subquery. Attempt to inline one level deeper, and then try again.
            return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
        }
    } else {
        // Not a query datasource. Walk children and see if there's anything to inline.
        return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
    }
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) Query(org.apache.druid.query.Query) QueryToolChest(org.apache.druid.query.QueryToolChest) Sequence(org.apache.druid.java.util.common.guava.Sequence) ResultLevelCachingQueryRunner(org.apache.druid.query.ResultLevelCachingQueryRunner) RetryQueryRunner(org.apache.druid.query.RetryQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) Stack(java.util.Stack)

Example 32 with Sequence

use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.

the class DumpSegmentTest method testExecuteQuery.

@Test
public void testExecuteQuery() {
    Injector injector = Mockito.mock(Injector.class);
    QueryRunnerFactoryConglomerate conglomerate = Mockito.mock(QueryRunnerFactoryConglomerate.class);
    QueryRunnerFactory factory = Mockito.mock(QueryRunnerFactory.class, Mockito.RETURNS_DEEP_STUBS);
    QueryRunner runner = Mockito.mock(QueryRunner.class);
    QueryRunner mergeRunner = Mockito.mock(QueryRunner.class);
    Query query = Mockito.mock(Query.class);
    Sequence expected = Sequences.simple(Collections.singletonList(123));
    Mockito.when(injector.getInstance(QueryRunnerFactoryConglomerate.class)).thenReturn(conglomerate);
    Mockito.when(conglomerate.findFactory(ArgumentMatchers.any())).thenReturn(factory);
    Mockito.when(factory.createRunner(ArgumentMatchers.any())).thenReturn(runner);
    Mockito.when(factory.getToolchest().mergeResults(factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, ImmutableList.of(runner)))).thenReturn(mergeRunner);
    Mockito.when(mergeRunner.run(ArgumentMatchers.any(), ArgumentMatchers.any())).thenReturn(expected);
    Sequence actual = DumpSegment.executeQuery(injector, null, query);
    Assert.assertSame(expected, actual);
}
Also used : QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) QueryRunnerFactory(org.apache.druid.query.QueryRunnerFactory) Query(org.apache.druid.query.Query) Injector(com.google.inject.Injector) Sequence(org.apache.druid.java.util.common.guava.Sequence) QueryRunner(org.apache.druid.query.QueryRunner) Test(org.junit.Test)

Example 33 with Sequence

use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.

the class TimewarpOperatorTest method testEmptyFutureInterval.

@Test
public void testEmptyFutureInterval() {
    QueryRunner<Result<TimeseriesResultValue>> queryRunner = testOperator.postProcess(new QueryRunner<Result<TimeseriesResultValue>>() {

        @Override
        public Sequence<Result<TimeseriesResultValue>> run(QueryPlus<Result<TimeseriesResultValue>> queryPlus, ResponseContext responseContext) {
            final Query<Result<TimeseriesResultValue>> query = queryPlus.getQuery();
            return Sequences.simple(ImmutableList.of(new Result<>(query.getIntervals().get(0).getStart(), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(query.getIntervals().get(0).getEnd(), new TimeseriesResultValue(ImmutableMap.of("metric", 3)))));
        }
    }, DateTimes.of("2014-08-02").getMillis());
    final Query<Result<TimeseriesResultValue>> query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2014-08-06/2014-08-08").aggregators(Collections.singletonList(new CountAggregatorFactory("count"))).build();
    Assert.assertEquals(Lists.newArrayList(new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 3)))), queryRunner.run(QueryPlus.wrap(query)).toList());
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ResponseContext(org.apache.druid.query.context.ResponseContext) Sequence(org.apache.druid.java.util.common.guava.Sequence) Test(org.junit.Test)

Example 34 with Sequence

use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.

the class TimewarpOperatorTest method testPostProcess.

@Test
public void testPostProcess() {
    QueryRunner<Result<TimeseriesResultValue>> queryRunner = testOperator.postProcess(new QueryRunner<Result<TimeseriesResultValue>>() {

        @Override
        public Sequence<Result<TimeseriesResultValue>> run(QueryPlus<Result<TimeseriesResultValue>> queryPlus, ResponseContext responseContext) {
            return Sequences.simple(ImmutableList.of(new Result<>(DateTimes.of("2014-01-09"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-01-11"), new TimeseriesResultValue(ImmutableMap.of("metric", 3))), new Result<>(queryPlus.getQuery().getIntervals().get(0).getEnd(), new TimeseriesResultValue(ImmutableMap.of("metric", 5)))));
        }
    }, DateTimes.of("2014-08-02").getMillis());
    final Query<Result<TimeseriesResultValue>> query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2014-07-31/2014-08-05").aggregators(Collections.singletonList(new CountAggregatorFactory("count"))).build();
    Assert.assertEquals(Lists.newArrayList(new Result<>(DateTimes.of("2014-07-31"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 3))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 5)))), queryRunner.run(QueryPlus.wrap(query)).toList());
    TimewarpOperator<Result<TimeBoundaryResultValue>> timeBoundaryOperator = new TimewarpOperator<>(new Interval(DateTimes.of("2014-01-01"), DateTimes.of("2014-01-15")), new Period("P1W"), // align on Monday
    DateTimes.of("2014-01-06"));
    QueryRunner<Result<TimeBoundaryResultValue>> timeBoundaryRunner = timeBoundaryOperator.postProcess(new QueryRunner<Result<TimeBoundaryResultValue>>() {

        @Override
        public Sequence<Result<TimeBoundaryResultValue>> run(QueryPlus<Result<TimeBoundaryResultValue>> queryPlus, ResponseContext responseContext) {
            return Sequences.simple(ImmutableList.of(new Result<>(DateTimes.of("2014-01-12"), new TimeBoundaryResultValue(ImmutableMap.<String, Object>of("maxTime", DateTimes.of("2014-01-12"))))));
        }
    }, DateTimes.of("2014-08-02").getMillis());
    final Query<Result<TimeBoundaryResultValue>> timeBoundaryQuery = Druids.newTimeBoundaryQueryBuilder().dataSource("dummy").build();
    Assert.assertEquals(Collections.singletonList(new Result<>(DateTimes.of("2014-08-02"), new TimeBoundaryResultValue(ImmutableMap.<String, Object>of("maxTime", DateTimes.of("2014-08-02"))))), timeBoundaryRunner.run(QueryPlus.wrap(timeBoundaryQuery)).toList());
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Period(org.joda.time.Period) Sequence(org.apache.druid.java.util.common.guava.Sequence) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ResponseContext(org.apache.druid.query.context.ResponseContext) TimeBoundaryResultValue(org.apache.druid.query.timeboundary.TimeBoundaryResultValue) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 35 with Sequence

use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.

the class ForegroundCachePopulator method wrap.

@Override
public <T, CacheType> Sequence<T> wrap(final Sequence<T> sequence, final Function<T, CacheType> cacheFn, final Cache cache, final Cache.NamedKey cacheKey) {
    final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    final MutableBoolean tooBig = new MutableBoolean(false);
    final JsonGenerator jsonGenerator;
    try {
        jsonGenerator = objectMapper.getFactory().createGenerator(bytes);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return Sequences.wrap(Sequences.map(sequence, input -> {
        if (!tooBig.isTrue()) {
            try {
                jsonGenerator.writeObject(cacheFn.apply(input));
                // typically just a few KB, and we don't want to waste cycles flushing.
                if (maxEntrySize > 0 && bytes.size() > maxEntrySize) {
                    tooBig.setValue(true);
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        return input;
    }), new SequenceWrapper() {

        @Override
        public void after(final boolean isDone, final Throwable thrown) throws Exception {
            jsonGenerator.close();
            if (isDone) {
                // Check tooBig, then check maxEntrySize one more time, after closing/flushing jsonGenerator.
                if (tooBig.isTrue() || (maxEntrySize > 0 && bytes.size() > maxEntrySize)) {
                    cachePopulatorStats.incrementOversized();
                    return;
                }
                try {
                    cache.put(cacheKey, bytes.toByteArray());
                    cachePopulatorStats.incrementOk();
                } catch (Exception e) {
                    log.warn(e, "Unable to write to cache");
                    cachePopulatorStats.incrementError();
                }
            }
        }
    });
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) Sequence(org.apache.druid.java.util.common.guava.Sequence) ByteArrayOutputStream(java.io.ByteArrayOutputStream) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Preconditions(com.google.common.base.Preconditions) IOException(java.io.IOException) SequenceWrapper(org.apache.druid.java.util.common.guava.SequenceWrapper) Function(java.util.function.Function) Sequences(org.apache.druid.java.util.common.guava.Sequences) SequenceWrapper(org.apache.druid.java.util.common.guava.SequenceWrapper) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) IOException(java.io.IOException)

Aggregations

Sequence (org.apache.druid.java.util.common.guava.Sequence)102 Test (org.junit.Test)53 List (java.util.List)44 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)37 ResponseContext (org.apache.druid.query.context.ResponseContext)32 ImmutableList (com.google.common.collect.ImmutableList)29 Intervals (org.apache.druid.java.util.common.Intervals)28 Granularities (org.apache.druid.java.util.common.granularity.Granularities)28 QueryRunner (org.apache.druid.query.QueryRunner)28 ArrayList (java.util.ArrayList)27 VirtualColumns (org.apache.druid.segment.VirtualColumns)26 Cursor (org.apache.druid.segment.Cursor)25 QueryPlus (org.apache.druid.query.QueryPlus)24 Result (org.apache.druid.query.Result)24 NullHandling (org.apache.druid.common.config.NullHandling)22 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)22 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)21 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)20 DataSegment (org.apache.druid.timeline.DataSegment)20 ImmutableMap (com.google.common.collect.ImmutableMap)18