use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.
the class HashJoinSegmentStorageAdapterTest method test_makeCursors_factToCountryLeft.
@Test
public void test_makeCursors_factToCountryLeft() {
List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
JoinFilterPreAnalysis joinFilterPreAnalysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
JoinTestHelper.verifyCursors(new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, joinFilterPreAnalysis).makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null), ImmutableList.of("page", "countryIsoCode", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryIsoCode", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryNumber"), ImmutableList.of(new Object[] { "Talk:Oswald Tilghman", null, null, null, NULL_COUNTRY }, new Object[] { "Rallicula", null, null, null, NULL_COUNTRY }, new Object[] { "Peremptory norm", "AU", "AU", "Australia", 0L }, new Object[] { "Apamea abruzzorum", null, null, null, NULL_COUNTRY }, new Object[] { "Atractus flammigerus", null, null, null, NULL_COUNTRY }, new Object[] { "Agama mossambica", null, null, null, NULL_COUNTRY }, new Object[] { "Mathis Bolly", "MX", "MX", "Mexico", 10L }, new Object[] { "유희왕 GX", "KR", "KR", "Republic of Korea", 9L }, new Object[] { "青野武", "JP", "JP", "Japan", 8L }, new Object[] { "Golpe de Estado en Chile de 1973", "CL", "CL", "Chile", 2L }, new Object[] { "President of India", "US", "US", "United States", 13L }, new Object[] { "Diskussion:Sebastian Schulz", "DE", "DE", "Germany", 3L }, new Object[] { "Saison 9 de Secret Story", "FR", "FR", "France", 5L }, new Object[] { "Glasgow", "GB", "GB", "United Kingdom", 6L }, new Object[] { "Didier Leclair", "CA", "CA", "Canada", 1L }, new Object[] { "Les Argonautes", "CA", "CA", "Canada", 1L }, new Object[] { "Otjiwarongo Airport", "US", "US", "United States", 13L }, new Object[] { "Sarah Michelle Gellar", "CA", "CA", "Canada", 1L }, new Object[] { "DirecTV", "US", "US", "United States", 13L }, new Object[] { "Carlo Curti", "US", "US", "United States", 13L }, new Object[] { "Giusy Ferreri discography", "IT", "IT", "Italy", 7L }, new Object[] { "Roma-Bangkok", "IT", "IT", "Italy", 7L }, new Object[] { "Wendigo", "SV", "SV", "El Salvador", 12L }, new Object[] { "Алиса в Зазеркалье", "NO", "NO", "Norway", 11L }, new Object[] { "Gabinete Ministerial de Rafael Correa", "EC", "EC", "Ecuador", 4L }, new Object[] { "Old Anatolian Turkish", "US", "US", "United States", 13L }, new Object[] { "Cream Soda", "SU", "SU", "States United", 15L }, new Object[] { "Orange Soda", "MatchNothing", null, null, NULL_COUNTRY }, new Object[] { "History of Fourems", "MMMM", "MMMM", "Fourems", 205L }));
}
use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.
the class HashJoinSegmentStorageAdapterTest method test_determineBaseColumnsWithPreAndPostJoinVirtualColumns.
@Test
public void test_determineBaseColumnsWithPreAndPostJoinVirtualColumns() {
List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
JoinFilterPreAnalysis analysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, analysis);
List<VirtualColumn> expectedPreJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode,'L')", "v0"), makeExpressionVirtualColumn("concat(countryIsoCode, countryNumber)", "v1"), makeExpressionVirtualColumn("channel_uniques - 1", "v2"), makeExpressionVirtualColumn("channel_uniques - __time", "v3"));
List<VirtualColumn> expectedPostJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode, dummyColumn)", "v4"), makeExpressionVirtualColumn("dummyMetric - __time", "v5"));
List<VirtualColumn> actualPreJoin = new ArrayList<>();
List<VirtualColumn> actualPostJoin = new ArrayList<>();
List<VirtualColumn> allVirtualColumns = new ArrayList<>();
allVirtualColumns.addAll(expectedPreJoin);
allVirtualColumns.addAll(expectedPostJoin);
adapter.determineBaseColumnsWithPreAndPostJoinVirtualColumns(VirtualColumns.create(allVirtualColumns), actualPreJoin, actualPostJoin);
Assert.assertEquals(expectedPreJoin, actualPreJoin);
Assert.assertEquals(expectedPostJoin, actualPostJoin);
}
use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.
the class HashJoinSegmentStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
final Filter combinedFilter = baseFilterAnd(filter);
if (clauses.isEmpty()) {
return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
}
// Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
// the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
// requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
// in the cached key.)
final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
if (!keyIn.equals(keyCached)) {
// It is a bug if this happens. The implied key and the cached key should always match.
throw new ISE("Pre-analysis mismatch, cannot execute query");
}
final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
// We merge the filter on base table specified by the user and filter on the base table that is pushed from
// the join
JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
Closer joinablesCloser = Closer.create();
return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
assert cursor != null;
Cursor retVal = cursor;
for (JoinableClause clause : clauses) {
retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
}
return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
}).withBaggage(joinablesCloser);
}
use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.
the class JoinableFactoryWrapper method createSegmentMapFn.
/**
* Creates a Function that maps base segments to {@link HashJoinSegment} if needed (i.e. if the number of join
* clauses is > 0). If mapping is not needed, this method will return {@link Function#identity()}.
*
* @param baseFilter Filter to apply before the join takes place
* @param clauses Pre-joinable clauses
* @param cpuTimeAccumulator An accumulator that we will add CPU nanos to; this is part of the function to encourage
* callers to remember to track metrics on CPU time required for creation of Joinables
* @param query The query that will be run on the mapped segments. Usually this should be
* {@code analysis.getBaseQuery().orElse(query)}, where "analysis" is a
* {@link DataSourceAnalysis} and "query" is the original
* query from the end user.
*/
public Function<SegmentReference, SegmentReference> createSegmentMapFn(@Nullable final Filter baseFilter, final List<PreJoinableClause> clauses, final AtomicLong cpuTimeAccumulator, final Query<?> query) {
// compute column correlations here and RHS correlated values
return JvmUtils.safeAccumulateThreadCpuTime(cpuTimeAccumulator, () -> {
if (clauses.isEmpty()) {
return Function.identity();
} else {
final JoinableClauses joinableClauses = JoinableClauses.createClauses(clauses, joinableFactory);
final JoinFilterRewriteConfig filterRewriteConfig = JoinFilterRewriteConfig.forQuery(query);
// Pick off any join clauses that can be converted into filters.
final Set<String> requiredColumns = query.getRequiredColumns();
final Filter baseFilterToUse;
final List<JoinableClause> clausesToUse;
if (requiredColumns != null && filterRewriteConfig.isEnableRewriteJoinToFilter()) {
final Pair<List<Filter>, List<JoinableClause>> conversionResult = convertJoinsToFilters(joinableClauses.getJoinableClauses(), requiredColumns, Ints.checkedCast(Math.min(filterRewriteConfig.getFilterRewriteMaxSize(), Integer.MAX_VALUE)));
baseFilterToUse = Filters.maybeAnd(Lists.newArrayList(Iterables.concat(Collections.singleton(baseFilter), conversionResult.lhs))).orElse(null);
clausesToUse = conversionResult.rhs;
} else {
baseFilterToUse = baseFilter;
clausesToUse = joinableClauses.getJoinableClauses();
}
// Analyze remaining join clauses to see if filters on them can be pushed down.
final JoinFilterPreAnalysis joinFilterPreAnalysis = JoinFilterAnalyzer.computeJoinFilterPreAnalysis(new JoinFilterPreAnalysisKey(filterRewriteConfig, clausesToUse, query.getVirtualColumns(), Filters.maybeAnd(Arrays.asList(baseFilterToUse, Filters.toFilter(query.getFilter()))).orElse(null)));
return baseSegment -> new HashJoinSegment(baseSegment, baseFilterToUse, clausesToUse, joinFilterPreAnalysis);
}
});
}
Aggregations