use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class Queries method withSpecificSegments.
/**
* Rewrite "query" to refer to some specific segment descriptors.
*
* The dataSource for "query" must be based on a single table for this operation to be valid. Otherwise, this
* function will throw an exception.
*
* Unlike the seemingly-similar {@code query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(descriptors))},
* this this method will walk down subqueries found within the query datasource, if any, and modify the lowest-level
* subquery. The effect is that
* {@code DataSourceAnalysis.forDataSource(query.getDataSource()).getBaseQuerySegmentSpec()} is guaranteed to return
* either {@code new MultipleSpecificSegmentSpec(descriptors)} or empty.
*
* Because {@link BaseQuery#getRunner} is implemented using {@link DataSourceAnalysis#getBaseQuerySegmentSpec}, this
* method will cause the runner to be a specific-segments runner.
*/
public static <T> Query<T> withSpecificSegments(final Query<T> query, final List<SegmentDescriptor> descriptors) {
final Query<T> retVal;
if (query.getDataSource() instanceof QueryDataSource) {
final Query<?> subQuery = ((QueryDataSource) query.getDataSource()).getQuery();
retVal = query.withDataSource(new QueryDataSource(withSpecificSegments(subQuery, descriptors)));
} else {
retVal = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(descriptors));
}
// Verify preconditions and invariants, just in case.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(retVal.getDataSource());
// Sanity check: query must be based on a single table.
if (!analysis.getBaseTableDataSource().isPresent()) {
throw new ISE("Unable to apply specific segments to non-table-based dataSource[%s]", query.getDataSource());
}
if (analysis.getBaseQuerySegmentSpec().isPresent() && !analysis.getBaseQuerySegmentSpec().get().equals(new MultipleSpecificSegmentSpec(descriptors))) {
// If you see the error message below, it's a bug in either this function or in DataSourceAnalysis.
throw new ISE("Unable to apply specific segments to query with dataSource[%s]", query.getDataSource());
}
return retVal;
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class UnionQueryRunner method run.
@Override
public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext responseContext) {
Query<T> query = queryPlus.getQuery();
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
if (analysis.isConcreteTableBased() && analysis.getBaseUnionDataSource().isPresent()) {
// Union of tables.
final UnionDataSource unionDataSource = analysis.getBaseUnionDataSource().get();
if (unionDataSource.getDataSources().isEmpty()) {
// Shouldn't happen, because UnionDataSource doesn't allow empty unions.
throw new ISE("Unexpectedly received empty union");
} else if (unionDataSource.getDataSources().size() == 1) {
// Single table. Run as a normal query.
return baseRunner.run(queryPlus.withQuery(Queries.withBaseDataSource(query, Iterables.getOnlyElement(unionDataSource.getDataSources()))), responseContext);
} else {
// Split up the tables and merge their results.
return new MergeSequence<>(query.getResultOrdering(), Sequences.simple(Lists.transform(IntStream.range(0, unionDataSource.getDataSources().size()).mapToObj(i -> new Pair<>(unionDataSource.getDataSources().get(i), i + 1)).collect(Collectors.toList()), (Function<Pair<TableDataSource, Integer>, Sequence<T>>) singleSourceWithIndex -> baseRunner.run(queryPlus.withQuery(Queries.withBaseDataSource(query, singleSourceWithIndex.lhs).withSubQueryId(generateSubqueryId(query.getSubQueryId(), singleSourceWithIndex.lhs.getName(), singleSourceWithIndex.rhs))), responseContext))));
}
} else {
// Not a union of tables. Do nothing special.
return baseRunner.run(queryPlus, responseContext);
}
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class JoinableFactoryWrapper method createSegmentMapFn.
/**
* Creates a Function that maps base segments to {@link HashJoinSegment} if needed (i.e. if the number of join
* clauses is > 0). If mapping is not needed, this method will return {@link Function#identity()}.
*
* @param baseFilter Filter to apply before the join takes place
* @param clauses Pre-joinable clauses
* @param cpuTimeAccumulator An accumulator that we will add CPU nanos to; this is part of the function to encourage
* callers to remember to track metrics on CPU time required for creation of Joinables
* @param query The query that will be run on the mapped segments. Usually this should be
* {@code analysis.getBaseQuery().orElse(query)}, where "analysis" is a
* {@link DataSourceAnalysis} and "query" is the original
* query from the end user.
*/
public Function<SegmentReference, SegmentReference> createSegmentMapFn(@Nullable final Filter baseFilter, final List<PreJoinableClause> clauses, final AtomicLong cpuTimeAccumulator, final Query<?> query) {
// compute column correlations here and RHS correlated values
return JvmUtils.safeAccumulateThreadCpuTime(cpuTimeAccumulator, () -> {
if (clauses.isEmpty()) {
return Function.identity();
} else {
final JoinableClauses joinableClauses = JoinableClauses.createClauses(clauses, joinableFactory);
final JoinFilterRewriteConfig filterRewriteConfig = JoinFilterRewriteConfig.forQuery(query);
// Pick off any join clauses that can be converted into filters.
final Set<String> requiredColumns = query.getRequiredColumns();
final Filter baseFilterToUse;
final List<JoinableClause> clausesToUse;
if (requiredColumns != null && filterRewriteConfig.isEnableRewriteJoinToFilter()) {
final Pair<List<Filter>, List<JoinableClause>> conversionResult = convertJoinsToFilters(joinableClauses.getJoinableClauses(), requiredColumns, Ints.checkedCast(Math.min(filterRewriteConfig.getFilterRewriteMaxSize(), Integer.MAX_VALUE)));
baseFilterToUse = Filters.maybeAnd(Lists.newArrayList(Iterables.concat(Collections.singleton(baseFilter), conversionResult.lhs))).orElse(null);
clausesToUse = conversionResult.rhs;
} else {
baseFilterToUse = baseFilter;
clausesToUse = joinableClauses.getJoinableClauses();
}
// Analyze remaining join clauses to see if filters on them can be pushed down.
final JoinFilterPreAnalysis joinFilterPreAnalysis = JoinFilterAnalyzer.computeJoinFilterPreAnalysis(new JoinFilterPreAnalysisKey(filterRewriteConfig, clausesToUse, query.getVirtualColumns(), Filters.maybeAnd(Arrays.asList(baseFilterToUse, Filters.toFilter(query.getFilter()))).orElse(null)));
return baseSegment -> new HashJoinSegment(baseSegment, baseFilterToUse, clausesToUse, joinFilterPreAnalysis);
}
});
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class ServerViewUtil method getTargetLocations.
public static List<LocatedSegmentDescriptor> getTargetLocations(TimelineServerView serverView, DataSource datasource, List<Interval> intervals, int numCandidates) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(datasource);
final Optional<? extends TimelineLookup<String, ServerSelector>> maybeTimeline = serverView.getTimeline(analysis);
if (!maybeTimeline.isPresent()) {
return Collections.emptyList();
}
List<LocatedSegmentDescriptor> located = new ArrayList<>();
for (Interval interval : intervals) {
for (TimelineObjectHolder<String, ServerSelector> holder : maybeTimeline.get().lookup(interval)) {
for (PartitionChunk<ServerSelector> chunk : holder.getObject()) {
ServerSelector selector = chunk.getObject();
final SegmentDescriptor descriptor = new SegmentDescriptor(holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());
long size = selector.getSegment().getSize();
List<DruidServerMetadata> candidates = selector.getCandidates(numCandidates);
located.add(new LocatedSegmentDescriptor(descriptor, size, candidates));
}
}
}
return located;
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class SinkQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
// We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// Sanity check: make sure the query is based on the table we're meant to handle.
if (!analysis.getBaseTableDataSource().filter(ds -> dataSource.equals(ds.getName())).isPresent()) {
throw new ISE("Cannot handle datasource: %s", analysis.getDataSource());
}
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(specs, descriptor -> {
final PartitionChunk<Sink> chunk = sinkTimeline.findChunk(descriptor.getInterval(), descriptor.getVersion(), descriptor.getPartitionNumber());
if (chunk == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
final Sink theSink = chunk.getObject();
final SegmentId sinkSegmentId = theSink.getSegment().getId();
Iterable<QueryRunner<T>> perHydrantRunners = new SinkQueryRunners<>(Iterables.transform(theSink, hydrant -> {
// Hydrant might swap at any point, but if it's swapped at the start
// then we know it's *definitely* swapped.
final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
return new Pair<>(hydrant.getSegmentDataInterval(), new NoopQueryRunner<>());
}
// Prevent the underlying segment from swapping when its being iterated
final Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentMapFn);
// if optional isn't present, we failed to acquire reference to the segment or any joinables
if (!maybeSegmentAndCloseable.isPresent()) {
return new Pair<>(hydrant.getSegmentDataInterval(), new ReportTimelineMissingSegmentQueryRunner<>(descriptor));
}
final Pair<SegmentReference, Closeable> segmentAndCloseable = maybeSegmentAndCloseable.get();
try {
QueryRunner<T> runner = factory.createRunner(segmentAndCloseable.lhs);
// 2) Hydrants are not the same between replicas, make sure cache is local
if (hydrantDefinitelySwapped && cache.isLocal()) {
StorageAdapter storageAdapter = segmentAndCloseable.lhs.asStorageAdapter();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
runner = new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), cacheKeyPrefix, descriptor, actualDataInterval, objectMapper, cache, toolChest, runner, // Always populate in foreground regardless of config
new ForegroundCachePopulator(objectMapper, cachePopulatorStats, cacheConfig.getMaxEntrySize()), cacheConfig);
}
// Make it always use Closeable to decrement()
runner = QueryRunnerHelper.makeClosingQueryRunner(runner, segmentAndCloseable.rhs);
return new Pair<>(segmentAndCloseable.lhs.getDataInterval(), runner);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, segmentAndCloseable.rhs);
}
}));
return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentId, descriptor.getInterval().getStart(), factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, perHydrantRunners)), toolChest, sinkSegmentId, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
});
final QueryRunner<T> mergedRunner = toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, perSegmentRunners));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(mergedRunner, toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
Aggregations