use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class ClientQuerySegmentWalker method inlineIfNecessary.
/**
* Replace QueryDataSources with InlineDataSources when necessary and possible. "Necessary" is defined as:
*
* 1) For outermost subqueries: inlining is necessary if the toolchest cannot handle it.
* 2) For all other subqueries (e.g. those nested under a join): inlining is always necessary.
*
* @param dataSource datasource to process.
* @param toolChestIfOutermost if provided, and if the provided datasource is a {@link QueryDataSource}, this method
* will consider whether the toolchest can handle a subquery on the datasource using
* {@link QueryToolChest#canPerformSubquery}. If the toolchest can handle it, then it will
* not be inlined. See {@link org.apache.druid.query.groupby.GroupByQueryQueryToolChest}
* for an example of a toolchest that can handle subqueries.
* @param subqueryRowLimitAccumulator an accumulator for tracking the number of accumulated rows in all subqueries
* for a particular master query
* @param maxSubqueryRows Max rows that all the subqueries generated by a master query can have, combined
* @param dryRun if true, does not actually execute any subqueries, but will inline empty result sets.
*/
// Subquery, toolchest, runner handling all use raw types
@SuppressWarnings({ "rawtypes", "unchecked" })
private DataSource inlineIfNecessary(final DataSource dataSource, @Nullable final QueryToolChest toolChestIfOutermost, final AtomicInteger subqueryRowLimitAccumulator, final int maxSubqueryRows, final boolean dryRun) {
if (dataSource instanceof QueryDataSource) {
// This datasource is a subquery.
final Query subQuery = ((QueryDataSource) dataSource).getQuery();
final QueryToolChest toolChest = warehouse.getToolChest(subQuery);
if (toolChestIfOutermost != null && toolChestIfOutermost.canPerformSubquery(subQuery)) {
// Strip outer queries that are handleable by the toolchest, and inline subqueries that may be underneath
// them (e.g. subqueries nested under a join).
final Stack<DataSource> stack = new Stack<>();
DataSource current = dataSource;
while (current instanceof QueryDataSource) {
stack.push(current);
current = Iterables.getOnlyElement(current.getChildren());
}
// lgtm [java/contradictory-type-checks]
assert !(current instanceof QueryDataSource);
current = inlineIfNecessary(current, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
while (!stack.isEmpty()) {
current = stack.pop().withChildren(Collections.singletonList(current));
}
assert current instanceof QueryDataSource;
if (toolChest.canPerformSubquery(((QueryDataSource) current).getQuery())) {
return current;
} else {
// We need to consider inlining it.
return inlineIfNecessary(current, toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else if (canRunQueryUsingLocalWalker(subQuery) || canRunQueryUsingClusterWalker(subQuery)) {
// Subquery needs to be inlined. Assign it a subquery id and run it.
final Sequence<?> queryResults;
if (dryRun) {
queryResults = Sequences.empty();
} else {
final QueryRunner subqueryRunner = subQuery.getRunner(this);
queryResults = subqueryRunner.run(QueryPlus.wrap(subQuery), DirectDruidClient.makeResponseContextForQuery());
}
return toInlineDataSource(subQuery, queryResults, warehouse.getToolChest(subQuery), subqueryRowLimitAccumulator, maxSubqueryRows);
} else {
// Cannot inline subquery. Attempt to inline one level deeper, and then try again.
return inlineIfNecessary(dataSource.withChildren(Collections.singletonList(inlineIfNecessary(Iterables.getOnlyElement(dataSource.getChildren()), null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun))), toolChestIfOutermost, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun);
}
} else {
// Not a query datasource. Walk children and see if there's anything to inline.
return dataSource.withChildren(dataSource.getChildren().stream().map(child -> inlineIfNecessary(child, null, subqueryRowLimitAccumulator, maxSubqueryRows, dryRun)).collect(Collectors.toList()));
}
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class DumpSegmentTest method testExecuteQuery.
@Test
public void testExecuteQuery() {
Injector injector = Mockito.mock(Injector.class);
QueryRunnerFactoryConglomerate conglomerate = Mockito.mock(QueryRunnerFactoryConglomerate.class);
QueryRunnerFactory factory = Mockito.mock(QueryRunnerFactory.class, Mockito.RETURNS_DEEP_STUBS);
QueryRunner runner = Mockito.mock(QueryRunner.class);
QueryRunner mergeRunner = Mockito.mock(QueryRunner.class);
Query query = Mockito.mock(Query.class);
Sequence expected = Sequences.simple(Collections.singletonList(123));
Mockito.when(injector.getInstance(QueryRunnerFactoryConglomerate.class)).thenReturn(conglomerate);
Mockito.when(conglomerate.findFactory(ArgumentMatchers.any())).thenReturn(factory);
Mockito.when(factory.createRunner(ArgumentMatchers.any())).thenReturn(runner);
Mockito.when(factory.getToolchest().mergeResults(factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, ImmutableList.of(runner)))).thenReturn(mergeRunner);
Mockito.when(mergeRunner.run(ArgumentMatchers.any(), ArgumentMatchers.any())).thenReturn(expected);
Sequence actual = DumpSegment.executeQuery(injector, null, query);
Assert.assertSame(expected, actual);
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class TimewarpOperatorTest method testEmptyFutureInterval.
@Test
public void testEmptyFutureInterval() {
QueryRunner<Result<TimeseriesResultValue>> queryRunner = testOperator.postProcess(new QueryRunner<Result<TimeseriesResultValue>>() {
@Override
public Sequence<Result<TimeseriesResultValue>> run(QueryPlus<Result<TimeseriesResultValue>> queryPlus, ResponseContext responseContext) {
final Query<Result<TimeseriesResultValue>> query = queryPlus.getQuery();
return Sequences.simple(ImmutableList.of(new Result<>(query.getIntervals().get(0).getStart(), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(query.getIntervals().get(0).getEnd(), new TimeseriesResultValue(ImmutableMap.of("metric", 3)))));
}
}, DateTimes.of("2014-08-02").getMillis());
final Query<Result<TimeseriesResultValue>> query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2014-08-06/2014-08-08").aggregators(Collections.singletonList(new CountAggregatorFactory("count"))).build();
Assert.assertEquals(Lists.newArrayList(new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 3)))), queryRunner.run(QueryPlus.wrap(query)).toList());
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class TimewarpOperatorTest method testPostProcess.
@Test
public void testPostProcess() {
QueryRunner<Result<TimeseriesResultValue>> queryRunner = testOperator.postProcess(new QueryRunner<Result<TimeseriesResultValue>>() {
@Override
public Sequence<Result<TimeseriesResultValue>> run(QueryPlus<Result<TimeseriesResultValue>> queryPlus, ResponseContext responseContext) {
return Sequences.simple(ImmutableList.of(new Result<>(DateTimes.of("2014-01-09"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-01-11"), new TimeseriesResultValue(ImmutableMap.of("metric", 3))), new Result<>(queryPlus.getQuery().getIntervals().get(0).getEnd(), new TimeseriesResultValue(ImmutableMap.of("metric", 5)))));
}
}, DateTimes.of("2014-08-02").getMillis());
final Query<Result<TimeseriesResultValue>> query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2014-07-31/2014-08-05").aggregators(Collections.singletonList(new CountAggregatorFactory("count"))).build();
Assert.assertEquals(Lists.newArrayList(new Result<>(DateTimes.of("2014-07-31"), new TimeseriesResultValue(ImmutableMap.of("metric", 2))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 3))), new Result<>(DateTimes.of("2014-08-02"), new TimeseriesResultValue(ImmutableMap.of("metric", 5)))), queryRunner.run(QueryPlus.wrap(query)).toList());
TimewarpOperator<Result<TimeBoundaryResultValue>> timeBoundaryOperator = new TimewarpOperator<>(new Interval(DateTimes.of("2014-01-01"), DateTimes.of("2014-01-15")), new Period("P1W"), // align on Monday
DateTimes.of("2014-01-06"));
QueryRunner<Result<TimeBoundaryResultValue>> timeBoundaryRunner = timeBoundaryOperator.postProcess(new QueryRunner<Result<TimeBoundaryResultValue>>() {
@Override
public Sequence<Result<TimeBoundaryResultValue>> run(QueryPlus<Result<TimeBoundaryResultValue>> queryPlus, ResponseContext responseContext) {
return Sequences.simple(ImmutableList.of(new Result<>(DateTimes.of("2014-01-12"), new TimeBoundaryResultValue(ImmutableMap.<String, Object>of("maxTime", DateTimes.of("2014-01-12"))))));
}
}, DateTimes.of("2014-08-02").getMillis());
final Query<Result<TimeBoundaryResultValue>> timeBoundaryQuery = Druids.newTimeBoundaryQueryBuilder().dataSource("dummy").build();
Assert.assertEquals(Collections.singletonList(new Result<>(DateTimes.of("2014-08-02"), new TimeBoundaryResultValue(ImmutableMap.<String, Object>of("maxTime", DateTimes.of("2014-08-02"))))), timeBoundaryRunner.run(QueryPlus.wrap(timeBoundaryQuery)).toList());
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class ForegroundCachePopulator method wrap.
@Override
public <T, CacheType> Sequence<T> wrap(final Sequence<T> sequence, final Function<T, CacheType> cacheFn, final Cache cache, final Cache.NamedKey cacheKey) {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final MutableBoolean tooBig = new MutableBoolean(false);
final JsonGenerator jsonGenerator;
try {
jsonGenerator = objectMapper.getFactory().createGenerator(bytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
return Sequences.wrap(Sequences.map(sequence, input -> {
if (!tooBig.isTrue()) {
try {
jsonGenerator.writeObject(cacheFn.apply(input));
// typically just a few KB, and we don't want to waste cycles flushing.
if (maxEntrySize > 0 && bytes.size() > maxEntrySize) {
tooBig.setValue(true);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return input;
}), new SequenceWrapper() {
@Override
public void after(final boolean isDone, final Throwable thrown) throws Exception {
jsonGenerator.close();
if (isDone) {
// Check tooBig, then check maxEntrySize one more time, after closing/flushing jsonGenerator.
if (tooBig.isTrue() || (maxEntrySize > 0 && bytes.size() > maxEntrySize)) {
cachePopulatorStats.incrementOversized();
return;
}
try {
cache.put(cacheKey, bytes.toByteArray());
cachePopulatorStats.incrementOk();
} catch (Exception e) {
log.warn(e, "Unable to write to cache");
cachePopulatorStats.incrementError();
}
}
}
});
}
Aggregations