use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class DruidSchema method buildDruidTable.
@VisibleForTesting
DruidTable buildDruidTable(final String dataSource) {
ConcurrentSkipListMap<SegmentId, AvailableSegmentMetadata> segmentsMap = segmentMetadataInfo.get(dataSource);
final Map<String, ColumnType> columnTypes = new TreeMap<>();
if (segmentsMap != null) {
for (AvailableSegmentMetadata availableSegmentMetadata : segmentsMap.values()) {
final RowSignature rowSignature = availableSegmentMetadata.getRowSignature();
if (rowSignature != null) {
for (String column : rowSignature.getColumnNames()) {
// Newer column types should override older ones.
final ColumnType columnType = rowSignature.getColumnType(column).orElseThrow(() -> new ISE("Encountered null type for column[%s]", column));
columnTypes.putIfAbsent(column, columnType);
}
}
}
}
final RowSignature.Builder builder = RowSignature.builder();
columnTypes.forEach(builder::add);
final TableDataSource tableDataSource;
// to be a GlobalTableDataSource instead of a TableDataSource, it must appear on all servers (inferred by existing
// in the segment cache, which in this case belongs to the broker meaning only broadcast segments live here)
// to be joinable, it must be possibly joinable according to the factory. we only consider broadcast datasources
// at this time, and isGlobal is currently strongly coupled with joinable, so only make a global table datasource
// if also joinable
final GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(dataSource);
final boolean isJoinable = joinableFactory.isDirectlyJoinable(maybeGlobal);
final boolean isBroadcast = segmentManager.getDataSourceNames().contains(dataSource);
if (isBroadcast && isJoinable) {
tableDataSource = maybeGlobal;
} else {
tableDataSource = new TableDataSource(dataSource);
}
return new DruidTable(tableDataSource, builder.build(), null, isJoinable, isBroadcast);
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalkerTest method initWalker.
/**
* Initialize (or reinitialize) our {@link #walker} and {@link #closer}.
*/
private void initWalker(final Map<String, String> serverProperties, QueryScheduler schedulerForTest) {
final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
final ServerConfig serverConfig = jsonMapper.convertValue(serverProperties, ServerConfig.class);
final SegmentWrangler segmentWrangler = new MapSegmentWrangler(ImmutableMap.<Class<? extends DataSource>, SegmentWrangler>builder().put(InlineDataSource.class, new InlineSegmentWrangler()).build());
final JoinableFactory globalFactory = new JoinableFactory() {
@Override
public boolean isDirectlyJoinable(DataSource dataSource) {
return ((GlobalTableDataSource) dataSource).getName().equals(GLOBAL);
}
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition) {
return Optional.empty();
}
};
final JoinableFactory joinableFactory = new MapJoinableFactory(ImmutableSet.of(globalFactory, new InlineJoinableFactory()), ImmutableMap.<Class<? extends JoinableFactory>, Class<? extends DataSource>>builder().put(InlineJoinableFactory.class, InlineDataSource.class).put(globalFactory.getClass(), GlobalTableDataSource.class).build());
class CapturingWalker implements QuerySegmentWalker {
private QuerySegmentWalker baseWalker;
private ClusterOrLocal how;
CapturingWalker(QuerySegmentWalker baseWalker, ClusterOrLocal how) {
this.baseWalker = baseWalker;
this.how = how;
}
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForIntervals(query, intervals);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForSegments(query, specs);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
}
walker = QueryStackTests.createClientQuerySegmentWalker(new CapturingWalker(QueryStackTests.createClusterQuerySegmentWalker(ImmutableMap.<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>>builder().put(FOO, makeTimeline(FOO, FOO_INLINE)).put(BAR, makeTimeline(BAR, BAR_INLINE)).put(MULTI, makeTimeline(MULTI, MULTI_VALUE_INLINE)).put(GLOBAL, makeTimeline(GLOBAL, FOO_INLINE)).put(ARRAY, makeTimeline(ARRAY, ARRAY_INLINE)).put(ARRAY_UNKNOWN, makeTimeline(ARRAY_UNKNOWN, ARRAY_INLINE_UNKNOWN)).build(), joinableFactory, conglomerate, schedulerForTest), ClusterOrLocal.CLUSTER), new CapturingWalker(QueryStackTests.createLocalQuerySegmentWalker(conglomerate, segmentWrangler, joinableFactory, schedulerForTest), ClusterOrLocal.LOCAL), conglomerate, joinableFactory, serverConfig);
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTable.
@Test
public void testLoadMultipleIndexedTable() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
final String version = DateTimes.nowUtc().toString();
final String version2 = DateTimes.nowUtc().plus(1000L).toString();
final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
final String interval2 = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
Assert.assertTrue(segmentManager.loadSegment(createSegment(data, interval, version), false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(733, joinable.getCardinality("market"));
Assert.assertEquals(733, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
// add another segment with smaller interval, only partially overshadows so there will be 2 segments in timeline
Assert.assertTrue(segmentManager.loadSegment(createSegment(data2, interval2, version2), false, SegmentLazyLoadFailCallback.NOOP));
expectedException.expect(ISE.class);
expectedException.expectMessage(StringUtils.format("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", TABLE_NAME));
// this will explode because datasource has multiple segments which is an invalid state for the joinable factory
makeJoinable(dataSource);
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method emptyCacheKeyForUnsupportedCondition.
@Test
public void emptyCacheKeyForUnsupportedCondition() {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
JoinConditionAnalysis condition = EasyMock.mock(JoinConditionAnalysis.class);
EasyMock.expect(condition.canHashJoin()).andReturn(false);
EasyMock.replay(condition);
Assert.assertNull(joinableFactory.build(dataSource, condition).orElse(null));
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTableOverwrite.
@Test
public void testLoadMultipleIndexedTableOverwrite() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
// larger interval overwrites smaller interval
final String version = DateTimes.nowUtc().toString();
final String version2 = DateTimes.nowUtc().plus(1000L).toString();
final String interval = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
final String interval2 = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
DataSegment segment1 = createSegment(data, interval, version);
DataSegment segment2 = createSegment(data2, interval2, version2);
Assert.assertTrue(segmentManager.loadSegment(segment1, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(segmentManager.loadSegment(segment2, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(733, joinable.getCardinality("market"));
Assert.assertEquals(733, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
Optional<byte[]> cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment2.getId(), cacheKey.get());
segmentManager.dropSegment(segment2);
// if new segment is dropped for some reason that probably never happens, old table should still exist..
maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(478, joinable.getCardinality("market"));
Assert.assertEquals(478, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment1.getId(), cacheKey.get());
}
Aggregations