use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class GroupByQueryRunnerTest method testNestedGroupByWithStringArray.
@Test
public void testNestedGroupByWithStringArray() {
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING");
}
cannotVectorize();
GroupByQuery inner = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setVirtualColumns(new ExpressionVirtualColumn("v0", "mv_to_array(placementish)", ColumnType.STRING_ARRAY, ExprMacroTable.nil())).setDimensions(new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
GroupByQuery outer = makeQueryBuilder().setDataSource(new QueryDataSource(inner)).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("alias", "alias_outer", ColumnType.STRING_ARRAY)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("a", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("b", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("e", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("h", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("m", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("n", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("p", "preferred"), "rows", 1L), makeRow(outer, "2011-04-01", "alias_outer", ComparableStringArray.of("preferred", "t"), "rows", 1L));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, outer);
TestHelper.assertExpectedObjects(expectedResults, results, "multi-value-dim-nested-groupby-arrays");
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method canRunQueryUsingLocalWalker.
/**
* Checks if a query can be handled wholly by {@link #localClient}. Assumes that it is a
* {@link LocalQuerySegmentWalker} or something that behaves similarly.
*/
private <T> boolean canRunQueryUsingLocalWalker(Query<T> query) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// subqueries on its own).
return analysis.isConcreteBased() && !analysis.isConcreteTableBased() && analysis.isGlobal() && (!analysis.isQuery() || toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery()));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method insertSubqueryIds.
/**
* To be used in conjunction with {@code generateSubqueryIds()} method. This does the actual task of populating the
* query's id, subQueryId and sqlQueryId
*
* @param currentDataSource The datasource to be populated with the subqueries
* @param queryDataSourceToSubqueryIds Map of the datasources to their level and sibling order
* @param parentQueryId Parent query's id
* @param parentSqlQueryId Parent query's sqlQueryId
* @return Populates the subqueries from the map
*/
private DataSource insertSubqueryIds(DataSource currentDataSource, Map<QueryDataSource, Pair<Integer, Integer>> queryDataSourceToSubqueryIds, @Nullable final String parentQueryId, @Nullable final String parentSqlQueryId) {
if (currentDataSource instanceof QueryDataSource && queryDataSourceToSubqueryIds.containsKey((QueryDataSource) currentDataSource)) {
QueryDataSource queryDataSource = (QueryDataSource) currentDataSource;
Pair<Integer, Integer> nestingInfo = queryDataSourceToSubqueryIds.get(queryDataSource);
String subQueryId = nestingInfo.lhs + "." + nestingInfo.rhs;
Query<?> query = queryDataSource.getQuery();
if (StringUtils.isEmpty(query.getSubQueryId())) {
query = query.withSubQueryId(subQueryId);
}
if (StringUtils.isEmpty(query.getId()) && StringUtils.isNotEmpty(parentQueryId)) {
query = query.withId(parentQueryId);
}
if (StringUtils.isEmpty(query.getSqlQueryId()) && StringUtils.isNotEmpty(parentSqlQueryId)) {
query = query.withSqlQueryId(parentSqlQueryId);
}
currentDataSource = new QueryDataSource(query);
}
return currentDataSource.withChildren(currentDataSource.getChildren().stream().map(childDataSource -> insertSubqueryIds(childDataSource, queryDataSourceToSubqueryIds, parentQueryId, parentSqlQueryId)).collect(Collectors.toList()));
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class SinkQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
// We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// Sanity check: make sure the query is based on the table we're meant to handle.
if (!analysis.getBaseTableDataSource().filter(ds -> dataSource.equals(ds.getName())).isPresent()) {
throw new ISE("Cannot handle datasource: %s", analysis.getDataSource());
}
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(specs, descriptor -> {
final PartitionChunk<Sink> chunk = sinkTimeline.findChunk(descriptor.getInterval(), descriptor.getVersion(), descriptor.getPartitionNumber());
if (chunk == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
final Sink theSink = chunk.getObject();
final SegmentId sinkSegmentId = theSink.getSegment().getId();
Iterable<QueryRunner<T>> perHydrantRunners = new SinkQueryRunners<>(Iterables.transform(theSink, hydrant -> {
// Hydrant might swap at any point, but if it's swapped at the start
// then we know it's *definitely* swapped.
final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
return new Pair<>(hydrant.getSegmentDataInterval(), new NoopQueryRunner<>());
}
// Prevent the underlying segment from swapping when its being iterated
final Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentMapFn);
// if optional isn't present, we failed to acquire reference to the segment or any joinables
if (!maybeSegmentAndCloseable.isPresent()) {
return new Pair<>(hydrant.getSegmentDataInterval(), new ReportTimelineMissingSegmentQueryRunner<>(descriptor));
}
final Pair<SegmentReference, Closeable> segmentAndCloseable = maybeSegmentAndCloseable.get();
try {
QueryRunner<T> runner = factory.createRunner(segmentAndCloseable.lhs);
// 2) Hydrants are not the same between replicas, make sure cache is local
if (hydrantDefinitelySwapped && cache.isLocal()) {
StorageAdapter storageAdapter = segmentAndCloseable.lhs.asStorageAdapter();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
runner = new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), cacheKeyPrefix, descriptor, actualDataInterval, objectMapper, cache, toolChest, runner, // Always populate in foreground regardless of config
new ForegroundCachePopulator(objectMapper, cachePopulatorStats, cacheConfig.getMaxEntrySize()), cacheConfig);
}
// Make it always use Closeable to decrement()
runner = QueryRunnerHelper.makeClosingQueryRunner(runner, segmentAndCloseable.rhs);
return new Pair<>(segmentAndCloseable.lhs.getDataInterval(), runner);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, segmentAndCloseable.rhs);
}
}));
return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentId, descriptor.getInterval().getStart(), factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, perHydrantRunners)), toolChest, sinkSegmentId, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
});
final QueryRunner<T> mergedRunner = toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, perSegmentRunners));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(mergedRunner, toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
use of org.apache.druid.query.QueryDataSource in project druid by druid-io.
the class QuantileSqlAggregatorTest method testQuantileOnInnerQuery.
@Test
public void testQuantileOnInnerQuery() throws Exception {
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 7.0, 8.26386833190918 });
} else {
expectedResults = ImmutableList.of(new Object[] { 5.25, 6.59091854095459 });
}
testQuery("SELECT AVG(x), APPROX_QUANTILE(x, 0.98)\n" + "FROM (SELECT dim2, SUM(m1) AS x FROM foo GROUP BY dim2)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("dim2", "d0")).setAggregatorSpecs(ImmutableList.of(new DoubleSumAggregatorFactory("a0", "m1"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(new DoubleSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count"), new ApproximateHistogramAggregatorFactory("_a1:agg", "a0", null, null, null, null, false)).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))), new QuantilePostAggregator("_a1", "_a1:agg", 0.98f))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Aggregations