use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey in project druid by druid-io.
the class HashJoinSegmentStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
final Filter combinedFilter = baseFilterAnd(filter);
if (clauses.isEmpty()) {
return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
}
// Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
// the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
// requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
// in the cached key.)
final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
if (!keyIn.equals(keyCached)) {
// It is a bug if this happens. The implied key and the cached key should always match.
throw new ISE("Pre-analysis mismatch, cannot execute query");
}
final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
// We merge the filter on base table specified by the user and filter on the base table that is pushed from
// the join
JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
Closer joinablesCloser = Closer.create();
return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
assert cursor != null;
Cursor retVal = cursor;
for (JoinableClause clause : clauses) {
retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
}
return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
}).withBaggage(joinablesCloser);
}
use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey in project druid by druid-io.
the class JoinableFactoryWrapper method createSegmentMapFn.
/**
* Creates a Function that maps base segments to {@link HashJoinSegment} if needed (i.e. if the number of join
* clauses is > 0). If mapping is not needed, this method will return {@link Function#identity()}.
*
* @param baseFilter Filter to apply before the join takes place
* @param clauses Pre-joinable clauses
* @param cpuTimeAccumulator An accumulator that we will add CPU nanos to; this is part of the function to encourage
* callers to remember to track metrics on CPU time required for creation of Joinables
* @param query The query that will be run on the mapped segments. Usually this should be
* {@code analysis.getBaseQuery().orElse(query)}, where "analysis" is a
* {@link DataSourceAnalysis} and "query" is the original
* query from the end user.
*/
public Function<SegmentReference, SegmentReference> createSegmentMapFn(@Nullable final Filter baseFilter, final List<PreJoinableClause> clauses, final AtomicLong cpuTimeAccumulator, final Query<?> query) {
// compute column correlations here and RHS correlated values
return JvmUtils.safeAccumulateThreadCpuTime(cpuTimeAccumulator, () -> {
if (clauses.isEmpty()) {
return Function.identity();
} else {
final JoinableClauses joinableClauses = JoinableClauses.createClauses(clauses, joinableFactory);
final JoinFilterRewriteConfig filterRewriteConfig = JoinFilterRewriteConfig.forQuery(query);
// Pick off any join clauses that can be converted into filters.
final Set<String> requiredColumns = query.getRequiredColumns();
final Filter baseFilterToUse;
final List<JoinableClause> clausesToUse;
if (requiredColumns != null && filterRewriteConfig.isEnableRewriteJoinToFilter()) {
final Pair<List<Filter>, List<JoinableClause>> conversionResult = convertJoinsToFilters(joinableClauses.getJoinableClauses(), requiredColumns, Ints.checkedCast(Math.min(filterRewriteConfig.getFilterRewriteMaxSize(), Integer.MAX_VALUE)));
baseFilterToUse = Filters.maybeAnd(Lists.newArrayList(Iterables.concat(Collections.singleton(baseFilter), conversionResult.lhs))).orElse(null);
clausesToUse = conversionResult.rhs;
} else {
baseFilterToUse = baseFilter;
clausesToUse = joinableClauses.getJoinableClauses();
}
// Analyze remaining join clauses to see if filters on them can be pushed down.
final JoinFilterPreAnalysis joinFilterPreAnalysis = JoinFilterAnalyzer.computeJoinFilterPreAnalysis(new JoinFilterPreAnalysisKey(filterRewriteConfig, clausesToUse, query.getVirtualColumns(), Filters.maybeAnd(Arrays.asList(baseFilterToUse, Filters.toFilter(query.getFilter()))).orElse(null)));
return baseSegment -> new HashJoinSegment(baseSegment, baseFilterToUse, clausesToUse, joinFilterPreAnalysis);
}
});
}
Aggregations