Search in sources :

Example 1 with Joinable

use of org.apache.druid.segment.join.Joinable in project druid by druid-io.

the class JoinableClauses method createClauses.

/**
 * Builds a list of {@link JoinableClause} corresponding to a list of {@link PreJoinableClause}. This will call
 * {@link JoinableFactory#build} on each one and therefore may be an expensive operation.
 */
public static JoinableClauses createClauses(final List<PreJoinableClause> preClauses, final JoinableFactory joinableFactory) {
    // Since building a JoinableClause can be expensive, check for prefix conflicts before building
    checkPreJoinableClausesForDuplicatesAndShadowing(preClauses);
    List<JoinableClause> joinableClauses = preClauses.stream().map(preJoinableClause -> {
        final Optional<Joinable> joinable = joinableFactory.build(preJoinableClause.getDataSource(), preJoinableClause.getCondition());
        return new JoinableClause(preJoinableClause.getPrefix(), joinable.orElseThrow(() -> new ISE("dataSource is not joinable: %s", preJoinableClause.getDataSource())), preJoinableClause.getJoinType(), preJoinableClause.getCondition());
    }).collect(Collectors.toList());
    return new JoinableClauses(joinableClauses);
}
Also used : VirtualColumns(org.apache.druid.segment.VirtualColumns) Joinable(org.apache.druid.segment.join.Joinable) PreJoinableClause(org.apache.druid.query.planning.PreJoinableClause) JoinableClause(org.apache.druid.segment.join.JoinableClause) VirtualColumn(org.apache.druid.segment.VirtualColumn) Collection(java.util.Collection) ISE(org.apache.druid.java.util.common.ISE) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) List(java.util.List) Optional(java.util.Optional) Nonnull(javax.annotation.Nonnull) JoinPrefixUtils(org.apache.druid.segment.join.JoinPrefixUtils) Nullable(javax.annotation.Nullable) Optional(java.util.Optional) ISE(org.apache.druid.java.util.common.ISE) PreJoinableClause(org.apache.druid.query.planning.PreJoinableClause) JoinableClause(org.apache.druid.segment.join.JoinableClause)

Example 2 with Joinable

use of org.apache.druid.segment.join.Joinable in project druid by druid-io.

the class ClientQuerySegmentWalkerTest method initWalker.

/**
 * Initialize (or reinitialize) our {@link #walker} and {@link #closer}.
 */
private void initWalker(final Map<String, String> serverProperties, QueryScheduler schedulerForTest) {
    final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
    final ServerConfig serverConfig = jsonMapper.convertValue(serverProperties, ServerConfig.class);
    final SegmentWrangler segmentWrangler = new MapSegmentWrangler(ImmutableMap.<Class<? extends DataSource>, SegmentWrangler>builder().put(InlineDataSource.class, new InlineSegmentWrangler()).build());
    final JoinableFactory globalFactory = new JoinableFactory() {

        @Override
        public boolean isDirectlyJoinable(DataSource dataSource) {
            return ((GlobalTableDataSource) dataSource).getName().equals(GLOBAL);
        }

        @Override
        public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition) {
            return Optional.empty();
        }
    };
    final JoinableFactory joinableFactory = new MapJoinableFactory(ImmutableSet.of(globalFactory, new InlineJoinableFactory()), ImmutableMap.<Class<? extends JoinableFactory>, Class<? extends DataSource>>builder().put(InlineJoinableFactory.class, InlineDataSource.class).put(globalFactory.getClass(), GlobalTableDataSource.class).build());
    class CapturingWalker implements QuerySegmentWalker {

        private QuerySegmentWalker baseWalker;

        private ClusterOrLocal how;

        CapturingWalker(QuerySegmentWalker baseWalker, ClusterOrLocal how) {
            this.baseWalker = baseWalker;
            this.how = how;
        }

        @Override
        public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
            final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForIntervals(query, intervals);
            return (queryPlus, responseContext) -> {
                log.info("Query (%s): %s", how, queryPlus.getQuery());
                issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
                return baseRunner.run(queryPlus, responseContext);
            };
        }

        @Override
        public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
            final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForSegments(query, specs);
            return (queryPlus, responseContext) -> {
                log.info("Query (%s): %s", how, queryPlus.getQuery());
                issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
                return baseRunner.run(queryPlus, responseContext);
            };
        }
    }
    walker = QueryStackTests.createClientQuerySegmentWalker(new CapturingWalker(QueryStackTests.createClusterQuerySegmentWalker(ImmutableMap.<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>>builder().put(FOO, makeTimeline(FOO, FOO_INLINE)).put(BAR, makeTimeline(BAR, BAR_INLINE)).put(MULTI, makeTimeline(MULTI, MULTI_VALUE_INLINE)).put(GLOBAL, makeTimeline(GLOBAL, FOO_INLINE)).put(ARRAY, makeTimeline(ARRAY, ARRAY_INLINE)).put(ARRAY_UNKNOWN, makeTimeline(ARRAY_UNKNOWN, ARRAY_INLINE_UNKNOWN)).build(), joinableFactory, conglomerate, schedulerForTest), ClusterOrLocal.CLUSTER), new CapturingWalker(QueryStackTests.createLocalQuerySegmentWalker(conglomerate, segmentWrangler, joinableFactory, schedulerForTest), ClusterOrLocal.LOCAL), conglomerate, joinableFactory, serverConfig);
}
Also used : QueryToolChestTestHelper(org.apache.druid.query.QueryToolChestTestHelper) QueryPlus(org.apache.druid.query.QueryPlus) Arrays(java.util.Arrays) RowBasedSegment(org.apache.druid.segment.RowBasedSegment) SegmentWrangler(org.apache.druid.segment.SegmentWrangler) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) After(org.junit.After) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) ManualQueryPrioritizationStrategy(org.apache.druid.server.scheduling.ManualQueryPrioritizationStrategy) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) DataSource(org.apache.druid.query.DataSource) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) QueryContexts(org.apache.druid.query.QueryContexts) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) Logger(org.apache.druid.java.util.common.logger.Logger) ComparableList(org.apache.druid.segment.data.ComparableList) Joinable(org.apache.druid.segment.join.Joinable) Intervals(org.apache.druid.java.util.common.Intervals) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinType(org.apache.druid.segment.join.JoinType) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) DirectDruidClient(org.apache.druid.client.DirectDruidClient) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) JoinDataSource(org.apache.druid.query.JoinDataSource) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) ExpectedException(org.junit.rules.ExpectedException) Sequences(org.apache.druid.java.util.common.guava.Sequences) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Before(org.junit.Before) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) InlineDataSource(org.apache.druid.query.InlineDataSource) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) ResponseContext(org.apache.druid.query.context.ResponseContext) ServerConfig(org.apache.druid.server.initialization.ServerConfig) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) Test(org.junit.Test) IOException(java.io.IOException) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TableDataSource(org.apache.druid.query.TableDataSource) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) UnionDataSource(org.apache.druid.query.UnionDataSource) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) GroupByQueryHelper(org.apache.druid.query.groupby.GroupByQueryHelper) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) ColumnType(org.apache.druid.segment.column.ColumnType) NoQueryLaningStrategy(org.apache.druid.server.scheduling.NoQueryLaningStrategy) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinDataSource(org.apache.druid.query.JoinDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) ServerConfig(org.apache.druid.server.initialization.ServerConfig) SegmentWrangler(org.apache.druid.segment.SegmentWrangler) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) InlineDataSource(org.apache.druid.query.InlineDataSource) Joinable(org.apache.druid.segment.join.Joinable) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 3 with Joinable

use of org.apache.druid.segment.join.Joinable in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTable.

@Test
public void testLoadMultipleIndexedTable() throws IOException, SegmentLoadingException {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
    final String version = DateTimes.nowUtc().toString();
    final String version2 = DateTimes.nowUtc().plus(1000L).toString();
    final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
    final String interval2 = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
    IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
    Assert.assertTrue(segmentManager.loadSegment(createSegment(data, interval, version), false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
    Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    Joinable joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(733, joinable.getCardinality("market"));
    Assert.assertEquals(733, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    // add another segment with smaller interval, only partially overshadows so there will be 2 segments in timeline
    Assert.assertTrue(segmentManager.loadSegment(createSegment(data2, interval2, version2), false, SegmentLazyLoadFailCallback.NOOP));
    expectedException.expect(ISE.class);
    expectedException.expectMessage(StringUtils.format("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", TABLE_NAME));
    // this will explode because datasource has multiple segments which is an invalid state for the joinable factory
    makeJoinable(dataSource);
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Joinable(org.apache.druid.segment.join.Joinable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with Joinable

use of org.apache.druid.segment.join.Joinable in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTableOverwrite.

@Test
public void testLoadMultipleIndexedTableOverwrite() throws IOException, SegmentLoadingException {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
    // larger interval overwrites smaller interval
    final String version = DateTimes.nowUtc().toString();
    final String version2 = DateTimes.nowUtc().plus(1000L).toString();
    final String interval = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
    final String interval2 = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
    IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
    DataSegment segment1 = createSegment(data, interval, version);
    DataSegment segment2 = createSegment(data2, interval2, version2);
    Assert.assertTrue(segmentManager.loadSegment(segment1, false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(segmentManager.loadSegment(segment2, false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
    Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    Joinable joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(733, joinable.getCardinality("market"));
    Assert.assertEquals(733, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    Optional<byte[]> cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertTrue(cacheKey.isPresent());
    assertSegmentIdEquals(segment2.getId(), cacheKey.get());
    segmentManager.dropSegment(segment2);
    // if new segment is dropped for some reason that probably never happens, old table should still exist..
    maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(478, joinable.getCardinality("market"));
    Assert.assertEquals(478, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertTrue(cacheKey.isPresent());
    assertSegmentIdEquals(segment1.getId(), cacheKey.get());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Joinable(org.apache.druid.segment.join.Joinable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSegment(org.apache.druid.timeline.DataSegment) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with Joinable

use of org.apache.druid.segment.join.Joinable in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadIndexedTable.

@Test
public void testLoadIndexedTable() throws IOException, SegmentLoadingException {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
    final String version = DateTimes.nowUtc().toString();
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
    final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
    DataSegment segment = createSegment(data, interval, version);
    Assert.assertTrue(segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
    Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    Joinable joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(1210, joinable.getCardinality("market"));
    Assert.assertEquals(1210, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    Optional<byte[]> bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertTrue(bytes.isPresent());
    assertSegmentIdEquals(segment.getId(), bytes.get());
    // dropping the segment should make the table no longer available
    segmentManager.dropSegment(segment);
    maybeJoinable = makeJoinable(dataSource);
    Assert.assertFalse(maybeJoinable.isPresent());
    bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertFalse(bytes.isPresent());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Joinable(org.apache.druid.segment.join.Joinable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSegment(org.apache.druid.timeline.DataSegment) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

Joinable (org.apache.druid.segment.join.Joinable)5 DataSource (org.apache.druid.query.DataSource)4 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)4 Test (org.junit.Test)4 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Optional (java.util.Optional)2 JoinableFactory (org.apache.druid.segment.join.JoinableFactory)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 Map (java.util.Map)1