Search in sources :

Example 1 with GlobalTableDataSource

use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.

the class DruidSchema method buildDruidTable.

@VisibleForTesting
DruidTable buildDruidTable(final String dataSource) {
    ConcurrentSkipListMap<SegmentId, AvailableSegmentMetadata> segmentsMap = segmentMetadataInfo.get(dataSource);
    final Map<String, ColumnType> columnTypes = new TreeMap<>();
    if (segmentsMap != null) {
        for (AvailableSegmentMetadata availableSegmentMetadata : segmentsMap.values()) {
            final RowSignature rowSignature = availableSegmentMetadata.getRowSignature();
            if (rowSignature != null) {
                for (String column : rowSignature.getColumnNames()) {
                    // Newer column types should override older ones.
                    final ColumnType columnType = rowSignature.getColumnType(column).orElseThrow(() -> new ISE("Encountered null type for column[%s]", column));
                    columnTypes.putIfAbsent(column, columnType);
                }
            }
        }
    }
    final RowSignature.Builder builder = RowSignature.builder();
    columnTypes.forEach(builder::add);
    final TableDataSource tableDataSource;
    // to be a GlobalTableDataSource instead of a TableDataSource, it must appear on all servers (inferred by existing
    // in the segment cache, which in this case belongs to the broker meaning only broadcast segments live here)
    // to be joinable, it must be possibly joinable according to the factory. we only consider broadcast datasources
    // at this time, and isGlobal is currently strongly coupled with joinable, so only make a global table datasource
    // if also joinable
    final GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(dataSource);
    final boolean isJoinable = joinableFactory.isDirectlyJoinable(maybeGlobal);
    final boolean isBroadcast = segmentManager.getDataSourceNames().contains(dataSource);
    if (isBroadcast && isJoinable) {
        tableDataSource = maybeGlobal;
    } else {
        tableDataSource = new TableDataSource(dataSource);
    }
    return new DruidTable(tableDataSource, builder.build(), null, isJoinable, isBroadcast);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) SegmentId(org.apache.druid.timeline.SegmentId) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) TreeMap(java.util.TreeMap) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) ISE(org.apache.druid.java.util.common.ISE) RowSignature(org.apache.druid.segment.column.RowSignature) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with GlobalTableDataSource

use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.

the class ClientQuerySegmentWalkerTest method initWalker.

/**
 * Initialize (or reinitialize) our {@link #walker} and {@link #closer}.
 */
private void initWalker(final Map<String, String> serverProperties, QueryScheduler schedulerForTest) {
    final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
    final ServerConfig serverConfig = jsonMapper.convertValue(serverProperties, ServerConfig.class);
    final SegmentWrangler segmentWrangler = new MapSegmentWrangler(ImmutableMap.<Class<? extends DataSource>, SegmentWrangler>builder().put(InlineDataSource.class, new InlineSegmentWrangler()).build());
    final JoinableFactory globalFactory = new JoinableFactory() {

        @Override
        public boolean isDirectlyJoinable(DataSource dataSource) {
            return ((GlobalTableDataSource) dataSource).getName().equals(GLOBAL);
        }

        @Override
        public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition) {
            return Optional.empty();
        }
    };
    final JoinableFactory joinableFactory = new MapJoinableFactory(ImmutableSet.of(globalFactory, new InlineJoinableFactory()), ImmutableMap.<Class<? extends JoinableFactory>, Class<? extends DataSource>>builder().put(InlineJoinableFactory.class, InlineDataSource.class).put(globalFactory.getClass(), GlobalTableDataSource.class).build());
    class CapturingWalker implements QuerySegmentWalker {

        private QuerySegmentWalker baseWalker;

        private ClusterOrLocal how;

        CapturingWalker(QuerySegmentWalker baseWalker, ClusterOrLocal how) {
            this.baseWalker = baseWalker;
            this.how = how;
        }

        @Override
        public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
            final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForIntervals(query, intervals);
            return (queryPlus, responseContext) -> {
                log.info("Query (%s): %s", how, queryPlus.getQuery());
                issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
                return baseRunner.run(queryPlus, responseContext);
            };
        }

        @Override
        public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
            final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForSegments(query, specs);
            return (queryPlus, responseContext) -> {
                log.info("Query (%s): %s", how, queryPlus.getQuery());
                issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
                return baseRunner.run(queryPlus, responseContext);
            };
        }
    }
    walker = QueryStackTests.createClientQuerySegmentWalker(new CapturingWalker(QueryStackTests.createClusterQuerySegmentWalker(ImmutableMap.<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>>builder().put(FOO, makeTimeline(FOO, FOO_INLINE)).put(BAR, makeTimeline(BAR, BAR_INLINE)).put(MULTI, makeTimeline(MULTI, MULTI_VALUE_INLINE)).put(GLOBAL, makeTimeline(GLOBAL, FOO_INLINE)).put(ARRAY, makeTimeline(ARRAY, ARRAY_INLINE)).put(ARRAY_UNKNOWN, makeTimeline(ARRAY_UNKNOWN, ARRAY_INLINE_UNKNOWN)).build(), joinableFactory, conglomerate, schedulerForTest), ClusterOrLocal.CLUSTER), new CapturingWalker(QueryStackTests.createLocalQuerySegmentWalker(conglomerate, segmentWrangler, joinableFactory, schedulerForTest), ClusterOrLocal.LOCAL), conglomerate, joinableFactory, serverConfig);
}
Also used : QueryToolChestTestHelper(org.apache.druid.query.QueryToolChestTestHelper) QueryPlus(org.apache.druid.query.QueryPlus) Arrays(java.util.Arrays) RowBasedSegment(org.apache.druid.segment.RowBasedSegment) SegmentWrangler(org.apache.druid.segment.SegmentWrangler) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) After(org.junit.After) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) ManualQueryPrioritizationStrategy(org.apache.druid.server.scheduling.ManualQueryPrioritizationStrategy) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) DataSource(org.apache.druid.query.DataSource) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) QueryContexts(org.apache.druid.query.QueryContexts) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) Logger(org.apache.druid.java.util.common.logger.Logger) ComparableList(org.apache.druid.segment.data.ComparableList) Joinable(org.apache.druid.segment.join.Joinable) Intervals(org.apache.druid.java.util.common.Intervals) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinType(org.apache.druid.segment.join.JoinType) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) DirectDruidClient(org.apache.druid.client.DirectDruidClient) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) JoinDataSource(org.apache.druid.query.JoinDataSource) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) ExpectedException(org.junit.rules.ExpectedException) Sequences(org.apache.druid.java.util.common.guava.Sequences) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Before(org.junit.Before) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) InlineDataSource(org.apache.druid.query.InlineDataSource) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) ResponseContext(org.apache.druid.query.context.ResponseContext) ServerConfig(org.apache.druid.server.initialization.ServerConfig) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) Test(org.junit.Test) IOException(java.io.IOException) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TableDataSource(org.apache.druid.query.TableDataSource) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) UnionDataSource(org.apache.druid.query.UnionDataSource) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) GroupByQueryHelper(org.apache.druid.query.groupby.GroupByQueryHelper) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) ColumnType(org.apache.druid.segment.column.ColumnType) NoQueryLaningStrategy(org.apache.druid.server.scheduling.NoQueryLaningStrategy) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinDataSource(org.apache.druid.query.JoinDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) ServerConfig(org.apache.druid.server.initialization.ServerConfig) SegmentWrangler(org.apache.druid.segment.SegmentWrangler) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) InlineSegmentWrangler(org.apache.druid.segment.InlineSegmentWrangler) InlineDataSource(org.apache.druid.query.InlineDataSource) Joinable(org.apache.druid.segment.join.Joinable) MapSegmentWrangler(org.apache.druid.segment.MapSegmentWrangler) InlineJoinableFactory(org.apache.druid.segment.join.InlineJoinableFactory) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 3 with GlobalTableDataSource

use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTable.

@Test
public void testLoadMultipleIndexedTable() throws IOException, SegmentLoadingException {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
    final String version = DateTimes.nowUtc().toString();
    final String version2 = DateTimes.nowUtc().plus(1000L).toString();
    final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
    final String interval2 = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
    IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
    Assert.assertTrue(segmentManager.loadSegment(createSegment(data, interval, version), false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
    Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    Joinable joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(733, joinable.getCardinality("market"));
    Assert.assertEquals(733, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    // add another segment with smaller interval, only partially overshadows so there will be 2 segments in timeline
    Assert.assertTrue(segmentManager.loadSegment(createSegment(data2, interval2, version2), false, SegmentLazyLoadFailCallback.NOOP));
    expectedException.expect(ISE.class);
    expectedException.expectMessage(StringUtils.format("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", TABLE_NAME));
    // this will explode because datasource has multiple segments which is an invalid state for the joinable factory
    makeJoinable(dataSource);
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Joinable(org.apache.druid.segment.join.Joinable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with GlobalTableDataSource

use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method emptyCacheKeyForUnsupportedCondition.

@Test
public void emptyCacheKeyForUnsupportedCondition() {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    JoinConditionAnalysis condition = EasyMock.mock(JoinConditionAnalysis.class);
    EasyMock.expect(condition.canHashJoin()).andReturn(false);
    EasyMock.replay(condition);
    Assert.assertNull(joinableFactory.build(dataSource, condition).orElse(null));
}
Also used : GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with GlobalTableDataSource

use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTableOverwrite.

@Test
public void testLoadMultipleIndexedTableOverwrite() throws IOException, SegmentLoadingException {
    final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
    Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
    // larger interval overwrites smaller interval
    final String version = DateTimes.nowUtc().toString();
    final String version2 = DateTimes.nowUtc().plus(1000L).toString();
    final String interval = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
    final String interval2 = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
    IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
    DataSegment segment1 = createSegment(data, interval, version);
    DataSegment segment2 = createSegment(data2, interval2, version2);
    Assert.assertTrue(segmentManager.loadSegment(segment1, false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(segmentManager.loadSegment(segment2, false, SegmentLazyLoadFailCallback.NOOP));
    Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
    Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    Joinable joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(733, joinable.getCardinality("market"));
    Assert.assertEquals(733, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    Optional<byte[]> cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertTrue(cacheKey.isPresent());
    assertSegmentIdEquals(segment2.getId(), cacheKey.get());
    segmentManager.dropSegment(segment2);
    // if new segment is dropped for some reason that probably never happens, old table should still exist..
    maybeJoinable = makeJoinable(dataSource);
    Assert.assertTrue(maybeJoinable.isPresent());
    joinable = maybeJoinable.get();
    // cardinality currently tied to number of rows,
    Assert.assertEquals(478, joinable.getCardinality("market"));
    Assert.assertEquals(478, joinable.getCardinality("placement"));
    Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
    cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
    Assert.assertTrue(cacheKey.isPresent());
    assertSegmentIdEquals(segment1.getId(), cacheKey.get());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Joinable(org.apache.druid.segment.join.Joinable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DataSegment(org.apache.druid.timeline.DataSegment) DataSource(org.apache.druid.query.DataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)13 DataSource (org.apache.druid.query.DataSource)8 Test (org.junit.Test)8 TableDataSource (org.apache.druid.query.TableDataSource)6 Joinable (org.apache.druid.segment.join.Joinable)4 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)4 ISE (org.apache.druid.java.util.common.ISE)3 InlineDataSource (org.apache.druid.query.InlineDataSource)3 QueryDataSource (org.apache.druid.query.QueryDataSource)3 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)3 JoinConditionAnalysis (org.apache.druid.segment.join.JoinConditionAnalysis)3 DataSegment (org.apache.druid.timeline.DataSegment)3 ArrayList (java.util.ArrayList)2 Optional (java.util.Optional)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 Query (org.apache.druid.query.Query)2 ColumnType (org.apache.druid.segment.column.ColumnType)2 RowSignature (org.apache.druid.segment.column.RowSignature)2 DruidTable (org.apache.druid.sql.calcite.table.DruidTable)2 SegmentId (org.apache.druid.timeline.SegmentId)2