Search in sources :

Example 76 with JoinFilterPreAnalysis

use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.

the class HashJoinSegmentStorageAdapterTest method test_makeCursors_factToCountryLeft.

@Test
public void test_makeCursors_factToCountryLeft() {
    List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
    JoinFilterPreAnalysis joinFilterPreAnalysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
    JoinTestHelper.verifyCursors(new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, joinFilterPreAnalysis).makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null), ImmutableList.of("page", "countryIsoCode", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryIsoCode", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName", FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryNumber"), ImmutableList.of(new Object[] { "Talk:Oswald Tilghman", null, null, null, NULL_COUNTRY }, new Object[] { "Rallicula", null, null, null, NULL_COUNTRY }, new Object[] { "Peremptory norm", "AU", "AU", "Australia", 0L }, new Object[] { "Apamea abruzzorum", null, null, null, NULL_COUNTRY }, new Object[] { "Atractus flammigerus", null, null, null, NULL_COUNTRY }, new Object[] { "Agama mossambica", null, null, null, NULL_COUNTRY }, new Object[] { "Mathis Bolly", "MX", "MX", "Mexico", 10L }, new Object[] { "유희왕 GX", "KR", "KR", "Republic of Korea", 9L }, new Object[] { "青野武", "JP", "JP", "Japan", 8L }, new Object[] { "Golpe de Estado en Chile de 1973", "CL", "CL", "Chile", 2L }, new Object[] { "President of India", "US", "US", "United States", 13L }, new Object[] { "Diskussion:Sebastian Schulz", "DE", "DE", "Germany", 3L }, new Object[] { "Saison 9 de Secret Story", "FR", "FR", "France", 5L }, new Object[] { "Glasgow", "GB", "GB", "United Kingdom", 6L }, new Object[] { "Didier Leclair", "CA", "CA", "Canada", 1L }, new Object[] { "Les Argonautes", "CA", "CA", "Canada", 1L }, new Object[] { "Otjiwarongo Airport", "US", "US", "United States", 13L }, new Object[] { "Sarah Michelle Gellar", "CA", "CA", "Canada", 1L }, new Object[] { "DirecTV", "US", "US", "United States", 13L }, new Object[] { "Carlo Curti", "US", "US", "United States", 13L }, new Object[] { "Giusy Ferreri discography", "IT", "IT", "Italy", 7L }, new Object[] { "Roma-Bangkok", "IT", "IT", "Italy", 7L }, new Object[] { "Wendigo", "SV", "SV", "El Salvador", 12L }, new Object[] { "Алиса в Зазеркалье", "NO", "NO", "Norway", 11L }, new Object[] { "Gabinete Ministerial de Rafael Correa", "EC", "EC", "Ecuador", 4L }, new Object[] { "Old Anatolian Turkish", "US", "US", "United States", 13L }, new Object[] { "Cream Soda", "SU", "SU", "States United", 15L }, new Object[] { "Orange Soda", "MatchNothing", null, null, NULL_COUNTRY }, new Object[] { "History of Fourems", "MMMM", "MMMM", "Fourems", 205L }));
}
Also used : JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) Test(org.junit.Test)

Example 77 with JoinFilterPreAnalysis

use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.

the class HashJoinSegmentStorageAdapterTest method test_determineBaseColumnsWithPreAndPostJoinVirtualColumns.

@Test
public void test_determineBaseColumnsWithPreAndPostJoinVirtualColumns() {
    List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
    JoinFilterPreAnalysis analysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
    HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, analysis);
    List<VirtualColumn> expectedPreJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode,'L')", "v0"), makeExpressionVirtualColumn("concat(countryIsoCode, countryNumber)", "v1"), makeExpressionVirtualColumn("channel_uniques - 1", "v2"), makeExpressionVirtualColumn("channel_uniques - __time", "v3"));
    List<VirtualColumn> expectedPostJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode, dummyColumn)", "v4"), makeExpressionVirtualColumn("dummyMetric - __time", "v5"));
    List<VirtualColumn> actualPreJoin = new ArrayList<>();
    List<VirtualColumn> actualPostJoin = new ArrayList<>();
    List<VirtualColumn> allVirtualColumns = new ArrayList<>();
    allVirtualColumns.addAll(expectedPreJoin);
    allVirtualColumns.addAll(expectedPostJoin);
    adapter.determineBaseColumnsWithPreAndPostJoinVirtualColumns(VirtualColumns.create(allVirtualColumns), actualPreJoin, actualPostJoin);
    Assert.assertEquals(expectedPreJoin, actualPreJoin);
    Assert.assertEquals(expectedPostJoin, actualPostJoin);
}
Also used : JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) ArrayList(java.util.ArrayList) VirtualColumn(org.apache.druid.segment.VirtualColumn) Test(org.junit.Test)

Example 78 with JoinFilterPreAnalysis

use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.

the class HashJoinSegmentStorageAdapter method makeCursors.

@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
    final Filter combinedFilter = baseFilterAnd(filter);
    if (clauses.isEmpty()) {
        return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
    }
    // Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
    // the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
    // requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
    // in the cached key.)
    final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
    final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
    if (!keyIn.equals(keyCached)) {
        // It is a bug if this happens. The implied key and the cached key should always match.
        throw new ISE("Pre-analysis mismatch, cannot execute query");
    }
    final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
    final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
    determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
    // We merge the filter on base table specified by the user and filter on the base table that is pushed from
    // the join
    JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
    preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
    final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
    Closer joinablesCloser = Closer.create();
    return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
        assert cursor != null;
        Cursor retVal = cursor;
        for (JoinableClause clause : clauses) {
            retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
        }
        return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
    }).withBaggage(joinablesCloser);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Indexed(org.apache.druid.segment.data.Indexed) Arrays(java.util.Arrays) Granularity(org.apache.druid.java.util.common.granularity.Granularity) QueryMetrics(org.apache.druid.query.QueryMetrics) Metadata(org.apache.druid.segment.Metadata) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) HashSet(java.util.HashSet) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ListIndexed(org.apache.druid.segment.data.ListIndexed) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) Nonnull(javax.annotation.Nonnull) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) LinkedHashSet(java.util.LinkedHashSet) Sequence(org.apache.druid.java.util.common.guava.Sequence) VirtualColumns(org.apache.druid.segment.VirtualColumns) Closer(org.apache.druid.java.util.common.io.Closer) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Optional(java.util.Optional) JoinFilterAnalyzer(org.apache.druid.segment.join.filter.JoinFilterAnalyzer) Filters(org.apache.druid.segment.filter.Filters) Filter(org.apache.druid.query.filter.Filter) Filter(org.apache.druid.query.filter.Filter) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) VirtualColumn(org.apache.druid.segment.VirtualColumn) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Cursor(org.apache.druid.segment.Cursor)

Example 79 with JoinFilterPreAnalysis

use of org.apache.druid.segment.join.filter.JoinFilterPreAnalysis in project druid by druid-io.

the class JoinableFactoryWrapper method createSegmentMapFn.

/**
 * Creates a Function that maps base segments to {@link HashJoinSegment} if needed (i.e. if the number of join
 * clauses is > 0). If mapping is not needed, this method will return {@link Function#identity()}.
 *
 * @param baseFilter         Filter to apply before the join takes place
 * @param clauses            Pre-joinable clauses
 * @param cpuTimeAccumulator An accumulator that we will add CPU nanos to; this is part of the function to encourage
 *                           callers to remember to track metrics on CPU time required for creation of Joinables
 * @param query              The query that will be run on the mapped segments. Usually this should be
 *                           {@code analysis.getBaseQuery().orElse(query)}, where "analysis" is a
 *                           {@link DataSourceAnalysis} and "query" is the original
 *                           query from the end user.
 */
public Function<SegmentReference, SegmentReference> createSegmentMapFn(@Nullable final Filter baseFilter, final List<PreJoinableClause> clauses, final AtomicLong cpuTimeAccumulator, final Query<?> query) {
    // compute column correlations here and RHS correlated values
    return JvmUtils.safeAccumulateThreadCpuTime(cpuTimeAccumulator, () -> {
        if (clauses.isEmpty()) {
            return Function.identity();
        } else {
            final JoinableClauses joinableClauses = JoinableClauses.createClauses(clauses, joinableFactory);
            final JoinFilterRewriteConfig filterRewriteConfig = JoinFilterRewriteConfig.forQuery(query);
            // Pick off any join clauses that can be converted into filters.
            final Set<String> requiredColumns = query.getRequiredColumns();
            final Filter baseFilterToUse;
            final List<JoinableClause> clausesToUse;
            if (requiredColumns != null && filterRewriteConfig.isEnableRewriteJoinToFilter()) {
                final Pair<List<Filter>, List<JoinableClause>> conversionResult = convertJoinsToFilters(joinableClauses.getJoinableClauses(), requiredColumns, Ints.checkedCast(Math.min(filterRewriteConfig.getFilterRewriteMaxSize(), Integer.MAX_VALUE)));
                baseFilterToUse = Filters.maybeAnd(Lists.newArrayList(Iterables.concat(Collections.singleton(baseFilter), conversionResult.lhs))).orElse(null);
                clausesToUse = conversionResult.rhs;
            } else {
                baseFilterToUse = baseFilter;
                clausesToUse = joinableClauses.getJoinableClauses();
            }
            // Analyze remaining join clauses to see if filters on them can be pushed down.
            final JoinFilterPreAnalysis joinFilterPreAnalysis = JoinFilterAnalyzer.computeJoinFilterPreAnalysis(new JoinFilterPreAnalysisKey(filterRewriteConfig, clausesToUse, query.getVirtualColumns(), Filters.maybeAnd(Arrays.asList(baseFilterToUse, Filters.toFilter(query.getFilter()))).orElse(null)));
            return baseSegment -> new HashJoinSegment(baseSegment, baseFilterToUse, clausesToUse, joinFilterPreAnalysis);
        }
    });
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) Multiset(com.google.common.collect.Multiset) CacheKeyBuilder(org.apache.druid.query.cache.CacheKeyBuilder) Function(java.util.function.Function) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) SegmentReference(org.apache.druid.segment.SegmentReference) Lists(com.google.common.collect.Lists) Query(org.apache.druid.query.Query) HashMultiset(com.google.common.collect.HashMultiset) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) IAE(org.apache.druid.java.util.common.IAE) JoinFilterRewriteConfig(org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig) Nullable(javax.annotation.Nullable) JvmUtils(org.apache.druid.utils.JvmUtils) PreJoinableClause(org.apache.druid.query.planning.PreJoinableClause) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) JoinableClauses(org.apache.druid.segment.join.filter.JoinableClauses) Ints(com.google.common.primitives.Ints) Sets(com.google.common.collect.Sets) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) JoinFilterAnalyzer(org.apache.druid.segment.join.filter.JoinFilterAnalyzer) VisibleForTesting(com.google.common.annotations.VisibleForTesting) InDimFilter(org.apache.druid.query.filter.InDimFilter) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) JoinFilterRewriteConfig(org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig) PreJoinableClause(org.apache.druid.query.planning.PreJoinableClause) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) InDimFilter(org.apache.druid.query.filter.InDimFilter) Filter(org.apache.druid.query.filter.Filter) ArrayList(java.util.ArrayList) List(java.util.List) JoinableClauses(org.apache.druid.segment.join.filter.JoinableClauses)

Aggregations

JoinFilterPreAnalysis (org.apache.druid.segment.join.filter.JoinFilterPreAnalysis)79 Test (org.junit.Test)74 Filter (org.apache.druid.query.filter.Filter)63 ExpressionDimFilter (org.apache.druid.query.filter.ExpressionDimFilter)61 SelectorFilter (org.apache.druid.segment.filter.SelectorFilter)61 InDimFilter (org.apache.druid.query.filter.InDimFilter)36 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)35 AndFilter (org.apache.druid.segment.filter.AndFilter)35 BoundFilter (org.apache.druid.segment.filter.BoundFilter)35 FalseFilter (org.apache.druid.segment.filter.FalseFilter)35 OrFilter (org.apache.druid.segment.filter.OrFilter)35 JoinFilterSplit (org.apache.druid.segment.join.filter.JoinFilterSplit)35 OrDimFilter (org.apache.druid.query.filter.OrDimFilter)26 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)26 IndexedTableJoinable (org.apache.druid.segment.join.table.IndexedTableJoinable)19 JoinFilterPreAnalysisKey (org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey)7 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)7 JoinFilterRewriteConfig (org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig)6 VirtualColumns (org.apache.druid.segment.VirtualColumns)5 ArrayList (java.util.ArrayList)3