use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class CachingClusteredClientTest method testTopNCaching.
@Test
@SuppressWarnings("unchecked")
public void testTopNCaching() {
final TopNQueryBuilder builder = new TopNQueryBuilder().dataSource(DATA_SOURCE).dimension(TOP_DIM).metric("imps").threshold(3).intervals(SEG_SPEC).filters(DIM_FILTER).granularity(GRANULARITY).aggregators(AGGS).postAggregators(POST_AGGS).context(CONTEXT);
QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), new TopNQueryQueryToolChest(new TopNQueryConfig()));
testQueryCaching(runner, builder.randomQueryId().build(), Intervals.of("2011-01-01/2011-01-02"), makeTopNResultsWithoutRename(DateTimes.of("2011-01-01"), "a", 50, 5000, "b", 50, 4999, "c", 50, 4998), Intervals.of("2011-01-02/2011-01-03"), makeTopNResultsWithoutRename(DateTimes.of("2011-01-02"), "a", 50, 4997, "b", 50, 4996, "c", 50, 4995), Intervals.of("2011-01-05/2011-01-10"), makeTopNResultsWithoutRename(DateTimes.of("2011-01-05"), "a", 50, 4994, "b", 50, 4993, "c", 50, 4992, DateTimes.of("2011-01-06"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-07"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-08"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, DateTimes.of("2011-01-09"), "c1", 50, 4985, "b", 50, 4984, "c", 50, 4983), Intervals.of("2011-01-05/2011-01-10"), makeTopNResultsWithoutRename(DateTimes.of("2011-01-05T01"), "a", 50, 4994, "b", 50, 4993, "c", 50, 4992, DateTimes.of("2011-01-06T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-07T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-08T01"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, DateTimes.of("2011-01-09T01"), "c2", 50, 4985, "b", 50, 4984, "c", 50, 4983));
TopNQuery query = builder.intervals("2011-01-01/2011-01-10").metric("imps").aggregators(RENAMED_AGGS).postAggregators(DIFF_ORDER_POST_AGGS).randomQueryId().build();
TestHelper.assertExpectedResults(makeRenamedTopNResults(DateTimes.of("2011-01-01"), "a", 50, 5000, "b", 50, 4999, "c", 50, 4998, DateTimes.of("2011-01-02"), "a", 50, 4997, "b", 50, 4996, "c", 50, 4995, DateTimes.of("2011-01-05"), "a", 50, 4994, "b", 50, 4993, "c", 50, 4992, DateTimes.of("2011-01-05T01"), "a", 50, 4994, "b", 50, 4993, "c", 50, 4992, DateTimes.of("2011-01-06"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-06T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-07"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-07T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, DateTimes.of("2011-01-08"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, DateTimes.of("2011-01-08T01"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, DateTimes.of("2011-01-09"), "c1", 50, 4985, "b", 50, 4984, "c", 50, 4983, DateTimes.of("2011-01-09T01"), "c2", 50, 4985, "b", 50, 4984, "c", 50, 4983), runner.run(QueryPlus.wrap(query)));
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class CachingClusteredClientTest method testQueryCaching.
@SuppressWarnings("unchecked")
public void testQueryCaching(final QueryRunner runner, final int numTimesToQuery, boolean expectBySegment, final Query query, // does this assume query intervals must be ordered?
Object... args) {
final List<Interval> queryIntervals = Lists.newArrayListWithCapacity(args.length / 2);
final List<List<Iterable<Result<Object>>>> expectedResults = Lists.newArrayListWithCapacity(queryIntervals.size());
parseResults(queryIntervals, expectedResults, args);
for (int i = 0; i < queryIntervals.size(); ++i) {
List<Object> mocks = new ArrayList<>();
mocks.add(serverView);
final Interval actualQueryInterval = new Interval(queryIntervals.get(0).getStart(), queryIntervals.get(i).getEnd());
final List<Map<DruidServer, ServerExpectations>> serverExpectationList = populateTimeline(queryIntervals, expectedResults, i, mocks);
List<Capture> queryCaptures = new ArrayList<>();
final Map<DruidServer, ServerExpectations> finalExpectation = serverExpectationList.get(serverExpectationList.size() - 1);
for (Map.Entry<DruidServer, ServerExpectations> entry : finalExpectation.entrySet()) {
DruidServer server = entry.getKey();
ServerExpectations expectations = entry.getValue();
EasyMock.expect(serverView.getQueryRunner(server)).andReturn(expectations.getQueryRunner()).once();
final Capture<? extends QueryPlus> capture = Capture.newInstance();
final Capture<? extends ResponseContext> context = Capture.newInstance();
queryCaptures.add(capture);
QueryRunner queryable = expectations.getQueryRunner();
if (query instanceof TimeseriesQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TimeseriesResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTimeseriesResults(expectBySegment, segmentIds, intervals, results)).once();
} else if (query instanceof TopNQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TopNResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTopNResults(segmentIds, intervals, results)).once();
} else if (query instanceof SearchQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<SearchResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableSearchResults(segmentIds, intervals, results)).once();
} else if (query instanceof GroupByQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<ResultRow>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableGroupByResults((GroupByQuery) query, segmentIds, intervals, results)).once();
} else if (query instanceof TimeBoundaryQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TimeBoundaryResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTimeBoundaryResults(segmentIds, intervals, results)).once();
} else {
throw new ISE("Unknown query type[%s]", query.getClass());
}
}
final int expectedResultsRangeStart;
final int expectedResultsRangeEnd;
if (query instanceof TimeBoundaryQuery) {
expectedResultsRangeStart = i;
expectedResultsRangeEnd = i + 1;
} else {
expectedResultsRangeStart = 0;
expectedResultsRangeEnd = i + 1;
}
runWithMocks(new Runnable() {
@Override
public void run() {
for (int i = 0; i < numTimesToQuery; ++i) {
TestHelper.assertExpectedResults(new MergeIterable(query instanceof GroupByQuery ? ((GroupByQuery) query).getResultOrdering() : Comparators.naturalNullsFirst(), FunctionalIterable.create(new RangeIterable(expectedResultsRangeStart, expectedResultsRangeEnd)).transformCat(new Function<Integer, Iterable<Iterable<Result<Object>>>>() {
@Override
public Iterable<Iterable<Result<Object>>> apply(@Nullable Integer input) {
List<Iterable<Result<Object>>> retVal = new ArrayList<>();
final Map<DruidServer, ServerExpectations> exps = serverExpectationList.get(input);
for (ServerExpectations expectations : exps.values()) {
for (ServerExpectation expectation : expectations) {
retVal.add(expectation.getResults());
}
}
return retVal;
}
})), runner.run(QueryPlus.wrap(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(actualQueryInterval)))), initializeResponseContext()));
if (queryCompletedCallback != null) {
queryCompletedCallback.run();
}
}
}
}, mocks.toArray());
// make sure all the queries were sent down as 'bySegment'
for (Capture queryCapture : queryCaptures) {
QueryPlus capturedQueryPlus = (QueryPlus) queryCapture.getValue();
Query capturedQuery = capturedQueryPlus.getQuery();
if (expectBySegment) {
Assert.assertEquals(true, capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY));
} else {
Assert.assertTrue(capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY) == null || capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY).equals(false));
}
}
}
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class DruidQuery method computeQuery.
/**
* Return this query as some kind of Druid query. The returned query will either be {@link TopNQuery},
* {@link TimeseriesQuery}, {@link GroupByQuery}, {@link ScanQuery}
*
* @return Druid query
*/
private Query computeQuery(final QueryFeatureInspector queryFeatureInspector) {
if (dataSource instanceof QueryDataSource) {
// If there is a subquery, then we prefer the outer query to be a groupBy if possible, since this potentially
// enables more efficient execution. (The groupBy query toolchest can handle some subqueries by itself, without
// requiring the Broker to inline results.)
final GroupByQuery outerQuery = toGroupByQuery(queryFeatureInspector);
if (outerQuery != null) {
return outerQuery;
}
}
final TimeseriesQuery tsQuery = toTimeseriesQuery(queryFeatureInspector);
if (tsQuery != null) {
return tsQuery;
}
final TopNQuery topNQuery = toTopNQuery(queryFeatureInspector);
if (topNQuery != null) {
return topNQuery;
}
final GroupByQuery groupByQuery = toGroupByQuery(queryFeatureInspector);
if (groupByQuery != null) {
return groupByQuery;
}
final ScanQuery scanQuery = toScanQuery(queryFeatureInspector);
if (scanQuery != null) {
return scanQuery;
}
throw new CannotBuildQueryException("Cannot convert query parts into an actual query");
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class DruidQuery method toTopNQuery.
/**
* Return this query as a TopN query, or null if this query is not compatible with TopN.
*
* @return query or null
*/
@Nullable
private TopNQuery toTopNQuery(final QueryFeatureInspector queryFeatureInspector) {
// Must be allowed by the QueryMaker.
if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TOPN)) {
return null;
}
// Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, LIMIT > 0 and ≤ maxTopNLimit,
// no OFFSET, no HAVING.
final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() > 0 && sorting.getOffsetLimit().getLimit() <= plannerContext.getPlannerConfig().getMaxTopNLimit() && !sorting.getOffsetLimit().hasOffset()) && grouping.getHavingFilter() == null;
if (!topNOk) {
return null;
}
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec();
// grouping col cannot be type array
if (dimensionSpec.getOutputType().isArray()) {
return null;
}
final OrderByColumnSpec limitColumn;
if (sorting.getOrderBys().isEmpty()) {
limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, Calcites.getStringComparatorForValueType(dimensionSpec.getOutputType()));
} else {
limitColumn = Iterables.getOnlyElement(sorting.getOrderBys());
}
final TopNMetricSpec topNMetricSpec;
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
// DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
} else if (plannerContext.getPlannerConfig().isUseApproximateTopN()) {
// ORDER BY metric
final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
} else {
return null;
}
final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
final DataSource newDataSource = dataSourceFiltrationPair.lhs;
final Filtration filtration = dataSourceFiltrationPair.rhs;
final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
if (sorting.getProjection() != null) {
postAggregators.addAll(sorting.getProjection().getPostAggregators());
}
return new TopNQuery(newDataSource, getVirtualColumns(true), dimensionSpec, topNMetricSpec, Ints.checkedCast(sorting.getOffsetLimit().getLimit()), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), postAggregators, ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class ClientQuerySegmentWalkerTest method testTopNOnArraysStrings.
@Test
public void testTopNOnArraysStrings() {
final TopNQuery query = (TopNQuery) new TopNQueryBuilder().dataSource(ARRAY).granularity(Granularities.ALL).intervals(Intervals.ONLY_ETERNITY).dimension(DefaultDimensionSpec.of("as")).metric("sum_n").threshold(1000).aggregators(new LongSumAggregatorFactory("sum_n", "n")).build().withId(DUMMY_QUERY_ID);
// group by cannot handle true array types, expect this, RuntimeExeception with IAE in stack trace
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("Cannot create query type helper from invalid type [ARRAY<STRING>]");
testQuery(query, ImmutableList.of(ExpectedQuery.cluster(query)), ImmutableList.of());
}
Aggregations