use of org.apache.druid.segment.join.Joinable in project druid by druid-io.
the class JoinableClauses method createClauses.
/**
* Builds a list of {@link JoinableClause} corresponding to a list of {@link PreJoinableClause}. This will call
* {@link JoinableFactory#build} on each one and therefore may be an expensive operation.
*/
public static JoinableClauses createClauses(final List<PreJoinableClause> preClauses, final JoinableFactory joinableFactory) {
// Since building a JoinableClause can be expensive, check for prefix conflicts before building
checkPreJoinableClausesForDuplicatesAndShadowing(preClauses);
List<JoinableClause> joinableClauses = preClauses.stream().map(preJoinableClause -> {
final Optional<Joinable> joinable = joinableFactory.build(preJoinableClause.getDataSource(), preJoinableClause.getCondition());
return new JoinableClause(preJoinableClause.getPrefix(), joinable.orElseThrow(() -> new ISE("dataSource is not joinable: %s", preJoinableClause.getDataSource())), preJoinableClause.getJoinType(), preJoinableClause.getCondition());
}).collect(Collectors.toList());
return new JoinableClauses(joinableClauses);
}
use of org.apache.druid.segment.join.Joinable in project druid by druid-io.
the class ClientQuerySegmentWalkerTest method initWalker.
/**
* Initialize (or reinitialize) our {@link #walker} and {@link #closer}.
*/
private void initWalker(final Map<String, String> serverProperties, QueryScheduler schedulerForTest) {
final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
final ServerConfig serverConfig = jsonMapper.convertValue(serverProperties, ServerConfig.class);
final SegmentWrangler segmentWrangler = new MapSegmentWrangler(ImmutableMap.<Class<? extends DataSource>, SegmentWrangler>builder().put(InlineDataSource.class, new InlineSegmentWrangler()).build());
final JoinableFactory globalFactory = new JoinableFactory() {
@Override
public boolean isDirectlyJoinable(DataSource dataSource) {
return ((GlobalTableDataSource) dataSource).getName().equals(GLOBAL);
}
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition) {
return Optional.empty();
}
};
final JoinableFactory joinableFactory = new MapJoinableFactory(ImmutableSet.of(globalFactory, new InlineJoinableFactory()), ImmutableMap.<Class<? extends JoinableFactory>, Class<? extends DataSource>>builder().put(InlineJoinableFactory.class, InlineDataSource.class).put(globalFactory.getClass(), GlobalTableDataSource.class).build());
class CapturingWalker implements QuerySegmentWalker {
private QuerySegmentWalker baseWalker;
private ClusterOrLocal how;
CapturingWalker(QuerySegmentWalker baseWalker, ClusterOrLocal how) {
this.baseWalker = baseWalker;
this.how = how;
}
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForIntervals(query, intervals);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForSegments(query, specs);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
}
walker = QueryStackTests.createClientQuerySegmentWalker(new CapturingWalker(QueryStackTests.createClusterQuerySegmentWalker(ImmutableMap.<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>>builder().put(FOO, makeTimeline(FOO, FOO_INLINE)).put(BAR, makeTimeline(BAR, BAR_INLINE)).put(MULTI, makeTimeline(MULTI, MULTI_VALUE_INLINE)).put(GLOBAL, makeTimeline(GLOBAL, FOO_INLINE)).put(ARRAY, makeTimeline(ARRAY, ARRAY_INLINE)).put(ARRAY_UNKNOWN, makeTimeline(ARRAY_UNKNOWN, ARRAY_INLINE_UNKNOWN)).build(), joinableFactory, conglomerate, schedulerForTest), ClusterOrLocal.CLUSTER), new CapturingWalker(QueryStackTests.createLocalQuerySegmentWalker(conglomerate, segmentWrangler, joinableFactory, schedulerForTest), ClusterOrLocal.LOCAL), conglomerate, joinableFactory, serverConfig);
}
use of org.apache.druid.segment.join.Joinable in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTable.
@Test
public void testLoadMultipleIndexedTable() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
final String version = DateTimes.nowUtc().toString();
final String version2 = DateTimes.nowUtc().plus(1000L).toString();
final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
final String interval2 = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
Assert.assertTrue(segmentManager.loadSegment(createSegment(data, interval, version), false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(733, joinable.getCardinality("market"));
Assert.assertEquals(733, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
// add another segment with smaller interval, only partially overshadows so there will be 2 segments in timeline
Assert.assertTrue(segmentManager.loadSegment(createSegment(data2, interval2, version2), false, SegmentLazyLoadFailCallback.NOOP));
expectedException.expect(ISE.class);
expectedException.expectMessage(StringUtils.format("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", TABLE_NAME));
// this will explode because datasource has multiple segments which is an invalid state for the joinable factory
makeJoinable(dataSource);
}
use of org.apache.druid.segment.join.Joinable in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadMultipleIndexedTableOverwrite.
@Test
public void testLoadMultipleIndexedTableOverwrite() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
// larger interval overwrites smaller interval
final String version = DateTimes.nowUtc().toString();
final String version2 = DateTimes.nowUtc().plus(1000L).toString();
final String interval = "2011-01-12T00:00:00.000Z/2011-03-28T00:00:00.000Z";
final String interval2 = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.top");
IncrementalIndex data2 = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
DataSegment segment1 = createSegment(data, interval, version);
DataSegment segment2 = createSegment(data2, interval2, version2);
Assert.assertTrue(segmentManager.loadSegment(segment1, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(segmentManager.loadSegment(segment2, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(733, joinable.getCardinality("market"));
Assert.assertEquals(733, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
Optional<byte[]> cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment2.getId(), cacheKey.get());
segmentManager.dropSegment(segment2);
// if new segment is dropped for some reason that probably never happens, old table should still exist..
maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(478, joinable.getCardinality("market"));
Assert.assertEquals(478, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment1.getId(), cacheKey.get());
}
use of org.apache.druid.segment.join.Joinable in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadIndexedTable.
@Test
public void testLoadIndexedTable() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
final String version = DateTimes.nowUtc().toString();
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
DataSegment segment = createSegment(data, interval, version);
Assert.assertTrue(segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(1210, joinable.getCardinality("market"));
Assert.assertEquals(1210, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
Optional<byte[]> bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(bytes.isPresent());
assertSegmentIdEquals(segment.getId(), bytes.get());
// dropping the segment should make the table no longer available
segmentManager.dropSegment(segment);
maybeJoinable = makeJoinable(dataSource);
Assert.assertFalse(maybeJoinable.isPresent());
bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertFalse(bytes.isPresent());
}
Aggregations