use of org.apache.druid.query.QueryPlus in project druid by druid-io.
the class GroupByTimeseriesQueryRunnerTest method constructorFeeder.
@SuppressWarnings("unchecked")
@Parameterized.Parameters(name = "{0}, vectorize = {1}")
public static Iterable<Object[]> constructorFeeder() {
GroupByQueryConfig config = new GroupByQueryConfig();
config.setMaxIntermediateRows(10000);
final Pair<GroupByQueryRunnerFactory, Closer> factoryAndCloser = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs;
RESOURCE_CLOSER.register(factoryAndCloser.rhs);
final List<Object[]> constructors = new ArrayList<>();
for (QueryRunner<ResultRow> runner : QueryRunnerTestHelper.makeQueryRunners(factory)) {
final QueryRunner modifiedRunner = new QueryRunner() {
@Override
public Sequence run(QueryPlus queryPlus, ResponseContext responseContext) {
TimeseriesQuery tsQuery = (TimeseriesQuery) queryPlus.getQuery();
QueryRunner<ResultRow> newRunner = factory.mergeRunners(Execs.directExecutor(), ImmutableList.of(runner));
QueryToolChest toolChest = factory.getToolchest();
newRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(toolChest.preMergeQueryDecoration(newRunner)), toolChest);
final String timeDimension = tsQuery.getTimestampResultField();
final List<VirtualColumn> virtualColumns = new ArrayList<>(Arrays.asList(tsQuery.getVirtualColumns().getVirtualColumns()));
Map<String, Object> theContext = tsQuery.getContext();
if (timeDimension != null) {
theContext = new HashMap<>(tsQuery.getContext());
final PeriodGranularity granularity = (PeriodGranularity) tsQuery.getGranularity();
virtualColumns.add(new ExpressionVirtualColumn("v0", StringUtils.format("timestamp_floor(__time, '%s')", granularity.getPeriod()), ColumnType.LONG, TestExprMacroTable.INSTANCE));
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, timeDimension);
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY, granularity);
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX, 0);
}
GroupByQuery newQuery = GroupByQuery.builder().setDataSource(tsQuery.getDataSource()).setQuerySegmentSpec(tsQuery.getQuerySegmentSpec()).setGranularity(tsQuery.getGranularity()).setDimFilter(tsQuery.getDimensionsFilter()).setDimensions(timeDimension == null ? ImmutableList.of() : ImmutableList.of(new DefaultDimensionSpec("v0", timeDimension, ColumnType.LONG))).setAggregatorSpecs(tsQuery.getAggregatorSpecs()).setPostAggregatorSpecs(tsQuery.getPostAggregatorSpecs()).setVirtualColumns(VirtualColumns.create(virtualColumns)).setContext(theContext).build();
return Sequences.map(newRunner.run(queryPlus.withQuery(newQuery), responseContext), new Function<ResultRow, Result<TimeseriesResultValue>>() {
@Override
public Result<TimeseriesResultValue> apply(final ResultRow input) {
final MapBasedRow mapBasedRow = input.toMapBasedRow(newQuery);
return new Result<>(mapBasedRow.getTimestamp(), new TimeseriesResultValue(mapBasedRow.getEvent()));
}
});
}
@Override
public String toString() {
return runner.toString();
}
};
for (boolean vectorize : ImmutableList.of(false, true)) {
// Add vectorization tests for any indexes that support it.
if (!vectorize || QueryRunnerTestHelper.isTestRunnerVectorizable(runner)) {
constructors.add(new Object[] { modifiedRunner, vectorize });
}
}
}
return constructors;
}
use of org.apache.druid.query.QueryPlus in project druid by druid-io.
the class CachingClusteredClientTest method testSingleDimensionPruning.
@Test
public void testSingleDimensionPruning() {
DimFilter filter = new AndDimFilter(new OrDimFilter(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim1", "from", "to", false, false, false, null, StringComparators.LEXICOGRAPHIC)), new AndDimFilter(new InDimFilter("dim2", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim2", "aaa", "hi", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC)));
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).context(CONTEXT).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS);
TimeseriesQuery query = builder.randomQueryId().build();
final Interval interval1 = Intervals.of("2011-01-06/2011-01-07");
final Interval interval2 = Intervals.of("2011-01-07/2011-01-08");
final Interval interval3 = Intervals.of("2011-01-08/2011-01-09");
QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), new TimeseriesQueryQueryToolChest());
final DruidServer lastServer = servers[random.nextInt(servers.length)];
ServerSelector selector1 = makeMockSingleDimensionSelector(lastServer, "dim1", null, "b", 0);
ServerSelector selector2 = makeMockSingleDimensionSelector(lastServer, "dim1", "e", "f", 1);
ServerSelector selector3 = makeMockSingleDimensionSelector(lastServer, "dim1", "hi", "zzz", 2);
ServerSelector selector4 = makeMockSingleDimensionSelector(lastServer, "dim2", "a", "e", 0);
ServerSelector selector5 = makeMockSingleDimensionSelector(lastServer, "dim2", null, null, 1);
ServerSelector selector6 = makeMockSingleDimensionSelector(lastServer, "other", "b", null, 0);
timeline.add(interval1, "v", new NumberedPartitionChunk<>(0, 3, selector1));
timeline.add(interval1, "v", new NumberedPartitionChunk<>(1, 3, selector2));
timeline.add(interval1, "v", new NumberedPartitionChunk<>(2, 3, selector3));
timeline.add(interval2, "v", new NumberedPartitionChunk<>(0, 2, selector4));
timeline.add(interval2, "v", new NumberedPartitionChunk<>(1, 2, selector5));
timeline.add(interval3, "v", new NumberedPartitionChunk<>(0, 1, selector6));
final Capture<QueryPlus> capture = Capture.newInstance();
final Capture<ResponseContext> contextCap = Capture.newInstance();
QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
EasyMock.replay(serverView);
EasyMock.replay(mockRunner);
List<SegmentDescriptor> descriptors = new ArrayList<>();
descriptors.add(new SegmentDescriptor(interval1, "v", 0));
descriptors.add(new SegmentDescriptor(interval1, "v", 2));
descriptors.add(new SegmentDescriptor(interval2, "v", 1));
descriptors.add(new SegmentDescriptor(interval3, "v", 0));
MultipleSpecificSegmentSpec expected = new MultipleSpecificSegmentSpec(descriptors);
runner.run(QueryPlus.wrap(query)).toList();
Assert.assertEquals(expected, ((TimeseriesQuery) capture.getValue().getQuery()).getQuerySegmentSpec());
}
use of org.apache.druid.query.QueryPlus in project druid by druid-io.
the class CachingClusteredClientTest method testQueryCaching.
@SuppressWarnings("unchecked")
public void testQueryCaching(final QueryRunner runner, final int numTimesToQuery, boolean expectBySegment, final Query query, // does this assume query intervals must be ordered?
Object... args) {
final List<Interval> queryIntervals = Lists.newArrayListWithCapacity(args.length / 2);
final List<List<Iterable<Result<Object>>>> expectedResults = Lists.newArrayListWithCapacity(queryIntervals.size());
parseResults(queryIntervals, expectedResults, args);
for (int i = 0; i < queryIntervals.size(); ++i) {
List<Object> mocks = new ArrayList<>();
mocks.add(serverView);
final Interval actualQueryInterval = new Interval(queryIntervals.get(0).getStart(), queryIntervals.get(i).getEnd());
final List<Map<DruidServer, ServerExpectations>> serverExpectationList = populateTimeline(queryIntervals, expectedResults, i, mocks);
List<Capture> queryCaptures = new ArrayList<>();
final Map<DruidServer, ServerExpectations> finalExpectation = serverExpectationList.get(serverExpectationList.size() - 1);
for (Map.Entry<DruidServer, ServerExpectations> entry : finalExpectation.entrySet()) {
DruidServer server = entry.getKey();
ServerExpectations expectations = entry.getValue();
EasyMock.expect(serverView.getQueryRunner(server)).andReturn(expectations.getQueryRunner()).once();
final Capture<? extends QueryPlus> capture = Capture.newInstance();
final Capture<? extends ResponseContext> context = Capture.newInstance();
queryCaptures.add(capture);
QueryRunner queryable = expectations.getQueryRunner();
if (query instanceof TimeseriesQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TimeseriesResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTimeseriesResults(expectBySegment, segmentIds, intervals, results)).once();
} else if (query instanceof TopNQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TopNResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTopNResults(segmentIds, intervals, results)).once();
} else if (query instanceof SearchQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<SearchResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableSearchResults(segmentIds, intervals, results)).once();
} else if (query instanceof GroupByQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<ResultRow>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableGroupByResults((GroupByQuery) query, segmentIds, intervals, results)).once();
} else if (query instanceof TimeBoundaryQuery) {
List<SegmentId> segmentIds = new ArrayList<>();
List<Interval> intervals = new ArrayList<>();
List<Iterable<Result<TimeBoundaryResultValue>>> results = new ArrayList<>();
for (ServerExpectation expectation : expectations) {
segmentIds.add(expectation.getSegmentId());
intervals.add(expectation.getInterval());
results.add(expectation.getResults());
}
EasyMock.expect(queryable.run(EasyMock.capture(capture), EasyMock.capture(context))).andReturn(toQueryableTimeBoundaryResults(segmentIds, intervals, results)).once();
} else {
throw new ISE("Unknown query type[%s]", query.getClass());
}
}
final int expectedResultsRangeStart;
final int expectedResultsRangeEnd;
if (query instanceof TimeBoundaryQuery) {
expectedResultsRangeStart = i;
expectedResultsRangeEnd = i + 1;
} else {
expectedResultsRangeStart = 0;
expectedResultsRangeEnd = i + 1;
}
runWithMocks(new Runnable() {
@Override
public void run() {
for (int i = 0; i < numTimesToQuery; ++i) {
TestHelper.assertExpectedResults(new MergeIterable(query instanceof GroupByQuery ? ((GroupByQuery) query).getResultOrdering() : Comparators.naturalNullsFirst(), FunctionalIterable.create(new RangeIterable(expectedResultsRangeStart, expectedResultsRangeEnd)).transformCat(new Function<Integer, Iterable<Iterable<Result<Object>>>>() {
@Override
public Iterable<Iterable<Result<Object>>> apply(@Nullable Integer input) {
List<Iterable<Result<Object>>> retVal = new ArrayList<>();
final Map<DruidServer, ServerExpectations> exps = serverExpectationList.get(input);
for (ServerExpectations expectations : exps.values()) {
for (ServerExpectation expectation : expectations) {
retVal.add(expectation.getResults());
}
}
return retVal;
}
})), runner.run(QueryPlus.wrap(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(actualQueryInterval)))), initializeResponseContext()));
if (queryCompletedCallback != null) {
queryCompletedCallback.run();
}
}
}
}, mocks.toArray());
// make sure all the queries were sent down as 'bySegment'
for (Capture queryCapture : queryCaptures) {
QueryPlus capturedQueryPlus = (QueryPlus) queryCapture.getValue();
Query capturedQuery = capturedQueryPlus.getQuery();
if (expectBySegment) {
Assert.assertEquals(true, capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY));
} else {
Assert.assertTrue(capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY) == null || capturedQuery.getContextValue(QueryContexts.BY_SEGMENT_KEY).equals(false));
}
}
}
}
use of org.apache.druid.query.QueryPlus in project druid by druid-io.
the class CachingClusteredClientPerfTest method testGetQueryRunnerForSegments_singleIntervalLargeSegments.
@Test(timeout = 10_000)
public void testGetQueryRunnerForSegments_singleIntervalLargeSegments() {
final int segmentCount = 30_000;
final Interval interval = Intervals.of("2021-02-13/2021-02-14");
final List<SegmentDescriptor> segmentDescriptors = new ArrayList<>(segmentCount);
final List<DataSegment> dataSegments = new ArrayList<>(segmentCount);
final VersionedIntervalTimeline<String, ServerSelector> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
final DruidServer server = new DruidServer("server", "localhost:9000", null, Long.MAX_VALUE, ServerType.HISTORICAL, DruidServer.DEFAULT_TIER, DruidServer.DEFAULT_PRIORITY);
for (int ii = 0; ii < segmentCount; ii++) {
segmentDescriptors.add(new SegmentDescriptor(interval, "1", ii));
DataSegment segment = makeDataSegment("test", interval, "1", ii);
dataSegments.add(segment);
}
timeline.addAll(Iterators.transform(dataSegments.iterator(), segment -> {
ServerSelector ss = new ServerSelector(segment, new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy()));
ss.addServerAndUpdateSegment(new QueryableDruidServer(server, new MockQueryRunner()), segment);
return new VersionedIntervalTimeline.PartitionChunkEntry<>(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(ss));
}));
TimelineServerView serverView = Mockito.mock(TimelineServerView.class);
QueryScheduler queryScheduler = Mockito.mock(QueryScheduler.class);
// mock scheduler to return same sequence as argument
Mockito.when(queryScheduler.run(any(), any())).thenAnswer(i -> i.getArgument(1));
Mockito.when(queryScheduler.prioritizeAndLaneQuery(any(), any())).thenAnswer(i -> ((QueryPlus) i.getArgument(0)).getQuery());
Mockito.doReturn(Optional.of(timeline)).when(serverView).getTimeline(any());
Mockito.doReturn(new MockQueryRunner()).when(serverView).getQueryRunner(any());
CachingClusteredClient cachingClusteredClient = new CachingClusteredClient(new MockQueryToolChestWareHouse(), serverView, MapCache.create(1024), TestHelper.makeJsonMapper(), Mockito.mock(CachePopulator.class), new CacheConfig(), Mockito.mock(DruidHttpClientConfig.class), Mockito.mock(DruidProcessingConfig.class), ForkJoinPool.commonPool(), queryScheduler, NoopJoinableFactory.INSTANCE, new NoopServiceEmitter());
Query<SegmentDescriptor> fakeQuery = makeFakeQuery(interval);
QueryRunner<SegmentDescriptor> queryRunner = cachingClusteredClient.getQueryRunnerForSegments(fakeQuery, segmentDescriptors);
Sequence<SegmentDescriptor> sequence = queryRunner.run(QueryPlus.wrap(fakeQuery));
Assert.assertEquals(segmentDescriptors, sequence.toList());
}
use of org.apache.druid.query.QueryPlus in project druid by druid-io.
the class CachingClusteredClientTest method testNoSegmentPruningForHashPartitionedSegments.
private void testNoSegmentPruningForHashPartitionedSegments(boolean enableSegmentPruning, @Nullable HashPartitionFunction partitionFunction, boolean useEmptyPartitionDimensions) {
DimFilter filter = new AndDimFilter(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC), // Equivalent filter of dim3 below is InDimFilter("dim3", Arrays.asList("c"), null)
new AndDimFilter(new InDimFilter("dim3", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim3", "aaa", "ddd", false, false, false, null, StringComparators.LEXICOGRAPHIC)));
final Map<String, Object> context = new HashMap<>(CONTEXT);
context.put(QueryContexts.SECONDARY_PARTITION_PRUNING_KEY, enableSegmentPruning);
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS).context(context).randomQueryId();
TimeseriesQuery query = builder.build();
QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), new TimeseriesQueryQueryToolChest());
final Interval interval1 = Intervals.of("2011-01-06/2011-01-07");
final Interval interval2 = Intervals.of("2011-01-07/2011-01-08");
final Interval interval3 = Intervals.of("2011-01-08/2011-01-09");
final DruidServer lastServer = servers[random.nextInt(servers.length)];
List<String> partitionDimensions = useEmptyPartitionDimensions ? ImmutableList.of() : ImmutableList.of("dim1");
final int numPartitions1 = 6;
for (int i = 0; i < numPartitions1; i++) {
ServerSelector selector = makeMockHashBasedSelector(lastServer, partitionDimensions, partitionFunction, i, numPartitions1);
timeline.add(interval1, "v", new NumberedPartitionChunk<>(i, numPartitions1, selector));
}
partitionDimensions = useEmptyPartitionDimensions ? ImmutableList.of() : ImmutableList.of("dim2");
final int numPartitions2 = 3;
for (int i = 0; i < numPartitions2; i++) {
ServerSelector selector = makeMockHashBasedSelector(lastServer, partitionDimensions, partitionFunction, i, numPartitions2);
timeline.add(interval2, "v", new NumberedPartitionChunk<>(i, numPartitions2, selector));
}
partitionDimensions = useEmptyPartitionDimensions ? ImmutableList.of() : ImmutableList.of("dim1", "dim3");
final int numPartitions3 = 4;
for (int i = 0; i < numPartitions3; i++) {
ServerSelector selector = makeMockHashBasedSelector(lastServer, partitionDimensions, partitionFunction, i, numPartitions3);
timeline.add(interval3, "v", new NumberedPartitionChunk<>(i, numPartitions3, selector));
}
final Capture<QueryPlus> capture = Capture.newInstance();
final Capture<ResponseContext> contextCap = Capture.newInstance();
QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
EasyMock.replay(serverView);
EasyMock.replay(mockRunner);
// Expected to read all segments
Set<SegmentDescriptor> expcetedDescriptors = new HashSet<>();
IntStream.range(0, numPartitions1).forEach(i -> expcetedDescriptors.add(new SegmentDescriptor(interval1, "v", i)));
IntStream.range(0, numPartitions2).forEach(i -> expcetedDescriptors.add(new SegmentDescriptor(interval2, "v", i)));
IntStream.range(0, numPartitions3).forEach(i -> expcetedDescriptors.add(new SegmentDescriptor(interval3, "v", i)));
runner.run(QueryPlus.wrap(query)).toList();
QuerySegmentSpec querySegmentSpec = ((TimeseriesQuery) capture.getValue().getQuery()).getQuerySegmentSpec();
Assert.assertSame(MultipleSpecificSegmentSpec.class, querySegmentSpec.getClass());
final Set<SegmentDescriptor> actualDescriptors = new HashSet<>(((MultipleSpecificSegmentSpec) querySegmentSpec).getDescriptors());
Assert.assertEquals(expcetedDescriptors, actualDescriptors);
}
Aggregations