Search in sources :

Example 16 with DataSourceAnalysis

use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.

the class JoinableFactoryWrapperTest method test_computeJoinDataSourceCacheKey_keyChangesWithBaseFilter.

@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithBaseFilter() {
    DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
    EasyMock.expect(analysis.getJoinBaseTableFilter()).andReturn(Optional.of(TrueDimFilter.instance())).anyTimes();
    JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
    PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "ab");
    EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
    EasyMock.replay(analysis);
    Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
    Assert.assertTrue(cacheKey1.isPresent());
    Assert.assertNotEquals(0, cacheKey1.get().length);
    PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "ab");
    EasyMock.reset(analysis);
    EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
    EasyMock.expect(analysis.getJoinBaseTableFilter()).andReturn(Optional.of(FalseDimFilter.instance())).anyTimes();
    EasyMock.replay(analysis);
    Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
    Assert.assertTrue(cacheKey2.isPresent());
    Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
Also used : PreJoinableClause(org.apache.druid.query.planning.PreJoinableClause) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 17 with DataSourceAnalysis

use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.

the class BroadcastTableJoinableFactory method getOnlyIndexedTable.

private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource) {
    GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
    DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
    return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
        Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
        if (!tableIterator.hasNext()) {
            return Optional.empty();
        }
        try {
            return Optional.of(Iterators.getOnlyElement(tableIterator));
        } catch (IllegalArgumentException iae) {
            throw new ISE("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", broadcastDataSource.getName());
        }
    });
}
Also used : ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) ISE(org.apache.druid.java.util.common.ISE) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis)

Example 18 with DataSourceAnalysis

use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.

the class ClientQuerySegmentWalker method canRunQueryUsingLocalWalker.

/**
 * Checks if a query can be handled wholly by {@link #localClient}. Assumes that it is a
 * {@link LocalQuerySegmentWalker} or something that behaves similarly.
 */
private <T> boolean canRunQueryUsingLocalWalker(Query<T> query) {
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    // subqueries on its own).
    return analysis.isConcreteBased() && !analysis.isConcreteTableBased() && analysis.isGlobal() && (!analysis.isQuery() || toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery()));
}
Also used : Query(org.apache.druid.query.Query) QueryDataSource(org.apache.druid.query.QueryDataSource) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis)

Example 19 with DataSourceAnalysis

use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.

the class LocalQuerySegmentWalker method getQueryRunnerForIntervals.

@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(final Query<T> query, final Iterable<Interval> intervals) {
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
    if (!analysis.isConcreteBased() || !analysis.isGlobal()) {
        throw new IAE("Cannot query dataSource locally: %s", analysis.getDataSource());
    }
    // wrap in ReferenceCountingSegment, these aren't currently managed by SegmentManager so reference tracking doesn't
    // matter, but at least some or all will be in a future PR
    final Iterable<ReferenceCountingSegment> segments = FunctionalIterable.create(segmentWrangler.getSegmentsForIntervals(analysis.getBaseDataSource(), intervals)).transform(ReferenceCountingSegment::wrapRootGenerationSegment);
    final AtomicLong cpuAccumulator = new AtomicLong(0L);
    final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuAccumulator, analysis.getBaseQuery().orElse(query));
    final QueryRunnerFactory<T, Query<T>> queryRunnerFactory = conglomerate.findFactory(query);
    final QueryRunner<T> baseRunner = queryRunnerFactory.mergeRunners(DirectQueryProcessingPool.INSTANCE, () -> StreamSupport.stream(segments.spliterator(), false).map(segmentMapFn).map(queryRunnerFactory::createRunner).iterator());
    // it is already supported.
    return new FluentQueryRunnerBuilder<>(queryRunnerFactory.getToolchest()).create(scheduler.wrapQueryRunner(baseRunner)).applyPreMergeDecoration().mergeResults().applyPostMergeDecoration().emitCPUTimeMetric(emitter, cpuAccumulator);
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Query(org.apache.druid.query.Query) SegmentReference(org.apache.druid.segment.SegmentReference) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) IAE(org.apache.druid.java.util.common.IAE) AtomicLong(java.util.concurrent.atomic.AtomicLong) Filters(org.apache.druid.segment.filter.Filters)

Example 20 with DataSourceAnalysis

use of org.apache.druid.query.planning.DataSourceAnalysis in project druid by druid-io.

the class SegmentManager method getIndexedTables.

/**
 * Returns the collection of {@link IndexedTable} for the entire timeline (since join conditions do not currently
 * consider the queries intervals), if the timeline exists for each of its segments that are joinable.
 */
public Optional<Stream<ReferenceCountingIndexedTable>> getIndexedTables(DataSourceAnalysis analysis) {
    return getTimeline(analysis).map(timeline -> {
        // join doesn't currently consider intervals, so just consider all segments
        final Stream<ReferenceCountingSegment> segments = timeline.lookup(Intervals.ETERNITY).stream().flatMap(x -> StreamSupport.stream(x.getObject().payloads().spliterator(), false));
        final TableDataSource tableDataSource = getTableDataSource(analysis);
        ConcurrentHashMap<SegmentId, ReferenceCountingIndexedTable> tables = Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTablesLookup).orElseThrow(() -> new ISE("Datasource %s does not have IndexedTables", tableDataSource.getName()));
        return segments.map(segment -> tables.get(segment.getId())).filter(Objects::nonNull);
    });
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Intervals(org.apache.druid.java.util.common.Intervals) Inject(com.google.inject.Inject) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) CollectionUtils(org.apache.druid.utils.CollectionUtils) SegmentLoader(org.apache.druid.segment.loading.SegmentLoader) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) Map(java.util.Map) StreamSupport(java.util.stream.StreamSupport) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Closer(org.apache.druid.java.util.common.io.Closer) IndexedTable(org.apache.druid.segment.join.table.IndexedTable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TableDataSource(org.apache.druid.query.TableDataSource) Objects(java.util.Objects) Stream(java.util.stream.Stream) Ordering(com.google.common.collect.Ordering) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentId(org.apache.druid.timeline.SegmentId) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) TableDataSource(org.apache.druid.query.TableDataSource) SegmentId(org.apache.druid.timeline.SegmentId) Objects(java.util.Objects) ISE(org.apache.druid.java.util.common.ISE)

Aggregations

DataSourceAnalysis (org.apache.druid.query.planning.DataSourceAnalysis)27 PreJoinableClause (org.apache.druid.query.planning.PreJoinableClause)11 Test (org.junit.Test)11 NullHandlingTest (org.apache.druid.common.config.NullHandlingTest)10 ISE (org.apache.druid.java.util.common.ISE)10 Query (org.apache.druid.query.Query)7 AtomicLong (java.util.concurrent.atomic.AtomicLong)5 NoopQueryRunner (org.apache.druid.query.NoopQueryRunner)5 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)5 VersionedIntervalTimeline (org.apache.druid.timeline.VersionedIntervalTimeline)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 ArrayList (java.util.ArrayList)4 Optional (java.util.Optional)4 QueryDataSource (org.apache.druid.query.QueryDataSource)4 QueryRunner (org.apache.druid.query.QueryRunner)4 TableDataSource (org.apache.druid.query.TableDataSource)4 SegmentReference (org.apache.druid.segment.SegmentReference)4 Filters (org.apache.druid.segment.filter.Filters)4 Preconditions (com.google.common.base.Preconditions)3 Iterables (com.google.common.collect.Iterables)3