use of org.apache.druid.query.TableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method getQueryRunnerForIntervals.
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
// transform TableDataSource to GlobalTableDataSource when eligible
// before further transformation to potentially inline
// Populate the subquery ids of the subquery id present in the main query
Query<T> newQuery = query.withDataSource(generateSubqueryIds(query.getDataSource(), query.getId(), query.getSqlQueryId()));
final DataSource freeTradeDataSource = globalizeIfPossible(newQuery.getDataSource());
// do an inlining dry run to see if any inlining is necessary, without actually running the queries.
final int maxSubqueryRows = QueryContexts.getMaxSubqueryRows(query, serverConfig.getMaxSubqueryRows());
final DataSource inlineDryRun = inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, true);
if (!canRunQueryUsingClusterWalker(query.withDataSource(inlineDryRun)) && !canRunQueryUsingLocalWalker(query.withDataSource(inlineDryRun))) {
// Dry run didn't go well.
throw new ISE("Cannot handle subquery structure for dataSource: %s", query.getDataSource());
}
// Now that we know the structure is workable, actually do the inlining (if necessary).
newQuery = newQuery.withDataSource(inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, false));
if (canRunQueryUsingLocalWalker(newQuery)) {
// No need to decorate since LocalQuerySegmentWalker does its own.
return new QuerySwappingQueryRunner<>(localClient.getQueryRunnerForIntervals(newQuery, intervals), query, newQuery);
} else if (canRunQueryUsingClusterWalker(newQuery)) {
// See https://github.com/apache/druid/issues/9229 for details.
return new QuerySwappingQueryRunner<>(decorateClusterRunner(newQuery, clusterClient.getQueryRunnerForIntervals(newQuery, intervals)), query, newQuery);
} else {
// that can't be run with either the local or cluster walkers. If this message ever shows up it is a bug.
throw new ISE("Inlined query could not be run");
}
}
use of org.apache.druid.query.TableDataSource in project druid by druid-io.
the class ClientQuerySegmentWalker method globalizeIfPossible.
private DataSource globalizeIfPossible(final DataSource dataSource) {
if (dataSource instanceof TableDataSource) {
GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(((TableDataSource) dataSource).getName());
if (joinableFactory.isDirectlyJoinable(maybeGlobal)) {
return maybeGlobal;
}
return dataSource;
} else {
List<DataSource> currentChildren = dataSource.getChildren();
List<DataSource> newChildren = new ArrayList<>(currentChildren.size());
for (DataSource child : currentChildren) {
newChildren.add(globalizeIfPossible(child));
}
return dataSource.withChildren(newChildren);
}
}
use of org.apache.druid.query.TableDataSource in project druid by druid-io.
the class QueryHostFinderTest method testFindServer.
@Test
public void testFindServer() {
QueryHostFinder queryRunner = new QueryHostFinder(brokerSelector, new RendezvousHashAvaticaConnectionBalancer());
Server server = queryRunner.findServer(new TimeBoundaryQuery(new TableDataSource("test"), new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-08-31/2011-09-01"))), null, null, null));
Assert.assertEquals("foo", server.getHost());
}
use of org.apache.druid.query.TableDataSource in project druid by druid-io.
the class DataSourceOptimizer method optimize.
/**
* Do main work about materialized view selection: transform user query to one or more sub-queries.
*
* In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
* intervals equals the interval in user query
*
* Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
* datasource in user query
*
* @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
* @return a list of queries with specified derived dataSources and intervals
*/
public List<Query> optimize(Query query) {
long start = System.currentTimeMillis();
// only TableDataSource can be optimiezed
if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
return Collections.singletonList(query);
}
String datasourceName = ((TableDataSource) query.getDataSource()).getName();
// get all derivatives for datasource in query. The derivatives set is sorted by average size of
// per segment granularity.
Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
if (derivatives.isEmpty()) {
return Collections.singletonList(query);
}
lock.readLock().lock();
try {
totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
// get all fields which the query required
Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
for (DerivativeDataSource derivativeDataSource : derivatives) {
derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
derivativesWithRequiredFields.add(derivativeDataSource);
}
}
// if no derivatives contains all required dimensions, this materialized view selection failed.
if (derivativesWithRequiredFields.isEmpty()) {
missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
List<Query> queries = new ArrayList<>();
List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
// not be selected.
if (derivativeIntervals.isEmpty()) {
continue;
}
remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
if (remainingQueryIntervals.isEmpty()) {
break;
}
}
if (queries.isEmpty()) {
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
// the original datasource.
if (!remainingQueryIntervals.isEmpty()) {
queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
}
hitCount.get(datasourceName).incrementAndGet();
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return queries;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.druid.query.TableDataSource in project druid by druid-io.
the class MovingAverageQueryRunner method run.
@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
List<Interval> intervals;
final Period period;
// Get the largest bucket from the list of averagers
Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
int buckets = opt.orElse(0);
// Extend the interval beginning by specified bucket - 1
if (maq.getGranularity() instanceof PeriodGranularity) {
period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
int offset = buckets <= 0 ? 0 : (1 - buckets);
intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
} else {
throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
}
Sequence<Row> resultsSeq;
DataSource dataSource = maq.getDataSource();
if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
// build groupBy query from movingAverage query
GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
GroupByQuery gbq = builder.build();
ResponseContext gbqResponseContext = ResponseContext.createEmpty();
gbqResponseContext.merge(responseContext);
gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
} else {
// no dimensions, so optimize this as a TimeSeries
TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
ResponseContext tsqResponseContext = ResponseContext.createEmpty();
tsqResponseContext.merge(responseContext);
tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
}
// Process into period buckets
Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
// Apply the windows analysis functions
Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
// Apply any postAveragers
Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
// remove rows outside the reporting window
List<Interval> reportingIntervals = maq.getIntervals();
movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
// Apply any having, sorting, and limits
movingAvgResults = maq.applyLimit(movingAvgResults);
return movingAvgResults;
}
Aggregations