use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class JoinableFactoryWrapperTest method test_computeJoinDataSourceCacheKey_keyChangesWithBaseFilter.
@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithBaseFilter() {
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
EasyMock.expect(analysis.getJoinBaseTableFilter()).andReturn(Optional.of(TrueDimFilter.instance())).anyTimes();
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "ab");
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "ab");
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.expect(analysis.getJoinBaseTableFilter()).andReturn(Optional.of(FalseDimFilter.instance())).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class BroadcastTableJoinableFactory method getOnlyIndexedTable.
private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource) {
GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
if (!tableIterator.hasNext()) {
return Optional.empty();
}
try {
return Optional.of(Iterators.getOnlyElement(tableIterator));
} catch (IllegalArgumentException iae) {
throw new ISE("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", broadcastDataSource.getName());
}
});
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class ClientQuerySegmentWalker method canRunQueryUsingLocalWalker.
/**
* Checks if a query can be handled wholly by {@link #localClient}. Assumes that it is a
* {@link LocalQuerySegmentWalker} or something that behaves similarly.
*/
private <T> boolean canRunQueryUsingLocalWalker(Query<T> query) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// subqueries on its own).
return analysis.isConcreteBased() && !analysis.isConcreteTableBased() && analysis.isGlobal() && (!analysis.isQuery() || toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery()));
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class LocalQuerySegmentWalker method getQueryRunnerForIntervals.
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(final Query<T> query, final Iterable<Interval> intervals) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
if (!analysis.isConcreteBased() || !analysis.isGlobal()) {
throw new IAE("Cannot query dataSource locally: %s", analysis.getDataSource());
}
// wrap in ReferenceCountingSegment, these aren't currently managed by SegmentManager so reference tracking doesn't
// matter, but at least some or all will be in a future PR
final Iterable<ReferenceCountingSegment> segments = FunctionalIterable.create(segmentWrangler.getSegmentsForIntervals(analysis.getBaseDataSource(), intervals)).transform(ReferenceCountingSegment::wrapRootGenerationSegment);
final AtomicLong cpuAccumulator = new AtomicLong(0L);
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuAccumulator, analysis.getBaseQuery().orElse(query));
final QueryRunnerFactory<T, Query<T>> queryRunnerFactory = conglomerate.findFactory(query);
final QueryRunner<T> baseRunner = queryRunnerFactory.mergeRunners(DirectQueryProcessingPool.INSTANCE, () -> StreamSupport.stream(segments.spliterator(), false).map(segmentMapFn).map(queryRunnerFactory::createRunner).iterator());
// it is already supported.
return new FluentQueryRunnerBuilder<>(queryRunnerFactory.getToolchest()).create(scheduler.wrapQueryRunner(baseRunner)).applyPreMergeDecoration().mergeResults().applyPostMergeDecoration().emitCPUTimeMetric(emitter, cpuAccumulator);
}
use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.
the class SegmentManager method getIndexedTables.
/**
* Returns the collection of {@link IndexedTable} for the entire timeline (since join conditions do not currently
* consider the queries intervals), if the timeline exists for each of its segments that are joinable.
*/
public Optional<Stream<ReferenceCountingIndexedTable>> getIndexedTables(DataSourceAnalysis analysis) {
return getTimeline(analysis).map(timeline -> {
// join doesn't currently consider intervals, so just consider all segments
final Stream<ReferenceCountingSegment> segments = timeline.lookup(Intervals.ETERNITY).stream().flatMap(x -> StreamSupport.stream(x.getObject().payloads().spliterator(), false));
final TableDataSource tableDataSource = getTableDataSource(analysis);
ConcurrentHashMap<SegmentId, ReferenceCountingIndexedTable> tables = Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTablesLookup).orElseThrow(() -> new ISE("Datasource %s does not have IndexedTables", tableDataSource.getName()));
return segments.map(segment -> tables.get(segment.getId())).filter(Objects::nonNull);
});
}
Aggregations