use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method getQueryRunnerForIntervals.
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// transform TableDataSource to GlobalTableDataSource when eligible
// before further transformation to potentially inline
// Populate the subquery ids of the subquery id present in the main query
Query<T> newQuery = query.withDataSource(generateSubqueryIds(query.getDataSource(), query.getId(), query.getSqlQueryId()));
final DataSource freeTradeDataSource = globalizeIfPossible(newQuery.getDataSource());
// do an inlining dry run to see if any inlining is necessary, without actually running the queries.
final int maxSubqueryRows = QueryContexts.getMaxSubqueryRows(query, serverConfig.getMaxSubqueryRows());
final DataSource inlineDryRun = inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, true);
if (!canRunQueryUsingClusterWalker(query.withDataSource(inlineDryRun)) && !canRunQueryUsingLocalWalker(query.withDataSource(inlineDryRun))) {
// Dry run didn't go well.
throw new ISE("Cannot handle subquery structure for dataSource: %s", query.getDataSource());
}
// Now that we know the structure is workable, actually do the inlining (if necessary).
newQuery = newQuery.withDataSource(inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, false));
if (canRunQueryUsingLocalWalker(newQuery)) {
// No need to decorate since LocalQuerySegmentWalker does its own.
return new QuerySwappingQueryRunner<>(localClient.getQueryRunnerForIntervals(newQuery, intervals), query, newQuery);
} else if (canRunQueryUsingClusterWalker(newQuery)) {
// See https://github.com/apache/druid/issues/9229 for details.
return new QuerySwappingQueryRunner<>(decorateClusterRunner(newQuery, clusterClient.getQueryRunnerForIntervals(newQuery, intervals)), query, newQuery);
} else {
// that can't be run with either the local or cluster walkers. If this message ever shows up it is a bug.
throw new ISE("Inlined query could not be run");
}
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method globalizeIfPossible.
private DataSource globalizeIfPossible(final DataSource dataSource) {
if (dataSource instanceof TableDataSource) {
GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(((TableDataSource) dataSource).getName());
if (joinableFactory.isDirectlyJoinable(maybeGlobal)) {
return maybeGlobal;
}
return dataSource;
} else {
List<DataSource> currentChildren = dataSource.getChildren();
List<DataSource> newChildren = new ArrayList<>(currentChildren.size());
for (DataSource child : currentChildren) {
newChildren.add(globalizeIfPossible(child));
}
return dataSource.withChildren(newChildren);
}
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class BroadcastTableJoinableFactory method getOnlyIndexedTable.
private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource) {
GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
if (!tableIterator.hasNext()) {
return Optional.empty();
}
try {
return Optional.of(Iterators.getOnlyElement(tableIterator));
} catch (IllegalArgumentException iae) {
throw new ISE("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", broadcastDataSource.getName());
}
});
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method testLoadIndexedTable.
@Test
public void testLoadIndexedTable() throws IOException, SegmentLoadingException {
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
Assert.assertFalse(joinableFactory.isDirectlyJoinable(dataSource));
final String version = DateTimes.nowUtc().toString();
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
final String interval = "2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z";
DataSegment segment = createSegment(data, interval, version);
Assert.assertTrue(segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP));
Assert.assertTrue(joinableFactory.isDirectlyJoinable(dataSource));
Optional<Joinable> maybeJoinable = makeJoinable(dataSource);
Assert.assertTrue(maybeJoinable.isPresent());
Joinable joinable = maybeJoinable.get();
// cardinality currently tied to number of rows,
Assert.assertEquals(1210, joinable.getCardinality("market"));
Assert.assertEquals(1210, joinable.getCardinality("placement"));
Assert.assertEquals(Optional.of(ImmutableSet.of("preferred")), joinable.getCorrelatedColumnValues("market", "spot", "placement", Long.MAX_VALUE, false));
Optional<byte[]> bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(bytes.isPresent());
assertSegmentIdEquals(segment.getId(), bytes.get());
// dropping the segment should make the table no longer available
segmentManager.dropSegment(segment);
maybeJoinable = makeJoinable(dataSource);
Assert.assertFalse(maybeJoinable.isPresent());
bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertFalse(bytes.isPresent());
}
use of org.apache.druid.query.GlobalTableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalkerTest method testTimeseriesOnAutomaticGlobalTable.
@Test
public void testTimeseriesOnAutomaticGlobalTable() {
final TimeseriesQuery query = (TimeseriesQuery) Druids.newTimeseriesQueryBuilder().dataSource(GLOBAL).granularity(Granularities.ALL).intervals(Collections.singletonList(INTERVAL)).aggregators(new LongSumAggregatorFactory("sum", "n")).context(ImmutableMap.of(TimeseriesQuery.CTX_GRAND_TOTAL, false)).build().withId(DUMMY_QUERY_ID);
// expect global/joinable datasource to be automatically translated into a GlobalTableDataSource
final TimeseriesQuery expectedClusterQuery = (TimeseriesQuery) Druids.newTimeseriesQueryBuilder().dataSource(new GlobalTableDataSource(GLOBAL)).granularity(Granularities.ALL).intervals(Collections.singletonList(INTERVAL)).aggregators(new LongSumAggregatorFactory("sum", "n")).context(ImmutableMap.of(TimeseriesQuery.CTX_GRAND_TOTAL, false)).build().withId(DUMMY_QUERY_ID);
testQuery(query, ImmutableList.of(ExpectedQuery.cluster(expectedClusterQuery)), ImmutableList.of(new Object[] { INTERVAL.getStartMillis(), 10L }));
Assert.assertEquals(1, scheduler.getTotalRun().get());
Assert.assertEquals(1, scheduler.getTotalPrioritizedAndLaned().get());
Assert.assertEquals(1, scheduler.getTotalAcquired().get());
Assert.assertEquals(1, scheduler.getTotalReleased().get());
}
Aggregations