use of io.trino.server.DynamicFilterService in project trino by trinodb.
the class TestHttpRemoteTask method testOutboundDynamicFilters.
@Test(timeOut = 30_000)
public void testOutboundDynamicFilters() throws Exception {
DynamicFilterId filterId1 = new DynamicFilterId("df1");
DynamicFilterId filterId2 = new DynamicFilterId("df2");
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol1 = symbolAllocator.newSymbol("DF_SYMBOL1", BIGINT);
Symbol symbol2 = symbolAllocator.newSymbol("DF_SYMBOL2", BIGINT);
SymbolReference df1 = symbol1.toSymbolReference();
SymbolReference df2 = symbol2.toSymbolReference();
ColumnHandle handle1 = new TestingColumnHandle("column1");
ColumnHandle handle2 = new TestingColumnHandle("column2");
QueryId queryId = new QueryId("test");
TestingTaskResource testingTaskResource = new TestingTaskResource(new AtomicLong(System.nanoTime()), FailureScenario.NO_FAILURE);
DynamicFilterService dynamicFilterService = new DynamicFilterService(PLANNER_CONTEXT.getMetadata(), PLANNER_CONTEXT.getFunctionManager(), new TypeOperators(), newDirectExecutorService());
dynamicFilterService.registerQuery(queryId, TEST_SESSION, ImmutableSet.of(filterId1, filterId2), ImmutableSet.of(filterId1, filterId2), ImmutableSet.of());
dynamicFilterService.stageCannotScheduleMoreTasks(new StageId(queryId, 1), 0, 1);
DynamicFilter dynamicFilter = dynamicFilterService.createDynamicFilter(queryId, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, df1), new DynamicFilters.Descriptor(filterId2, df2)), ImmutableMap.of(symbol1, handle1, symbol2, handle2), symbolAllocator.getTypes());
// make sure initial dynamic filter is collected
CompletableFuture<?> future = dynamicFilter.isBlocked();
dynamicFilterService.addTaskDynamicFilters(new TaskId(new StageId(queryId.getId(), 1), 1, 0), ImmutableMap.of(filterId1, Domain.singleValue(BIGINT, 1L)));
future.get();
assertEquals(dynamicFilter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(handle1, Domain.singleValue(BIGINT, 1L))));
// Create remote task after dynamic filter is created to simulate new nodes joining
HttpRemoteTaskFactory httpRemoteTaskFactory = createHttpRemoteTaskFactory(testingTaskResource, dynamicFilterService);
RemoteTask remoteTask = createRemoteTask(httpRemoteTaskFactory, ImmutableSet.of(filterId1, filterId2));
testingTaskResource.setInitialTaskInfo(remoteTask.getTaskInfo());
remoteTask.start();
assertEventually(new Duration(10, SECONDS), () -> assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 1L));
assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 1L);
// schedule a couple of splits to trigger task updates
addSplit(remoteTask, testingTaskResource, 1);
addSplit(remoteTask, testingTaskResource, 2);
// make sure dynamic filter was sent in task updates only once
assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 1L);
assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 3L);
assertEquals(testingTaskResource.getLatestDynamicFilterFromCoordinator(), ImmutableMap.of(filterId1, Domain.singleValue(BIGINT, 1L)));
future = dynamicFilter.isBlocked();
dynamicFilterService.addTaskDynamicFilters(new TaskId(new StageId(queryId.getId(), 1), 1, 0), ImmutableMap.of(filterId2, Domain.singleValue(BIGINT, 2L)));
future.get();
assertEquals(dynamicFilter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(handle1, Domain.singleValue(BIGINT, 1L), handle2, Domain.singleValue(BIGINT, 2L))));
// dynamic filter should be sent even though there were no further splits scheduled
assertEventually(new Duration(10, SECONDS), () -> assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 2L));
assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 4L);
// previously sent dynamic filter should not be repeated
assertEquals(testingTaskResource.getLatestDynamicFilterFromCoordinator(), ImmutableMap.of(filterId2, Domain.singleValue(BIGINT, 2L)));
httpRemoteTaskFactory.stop();
dynamicFilterService.stop();
}
use of io.trino.server.DynamicFilterService in project trino by trinodb.
the class TestSourcePartitionedScheduler method testNewTaskScheduledWhenChildStageBufferIsUnderutilized.
@Test
public void testNewTaskScheduledWhenChildStageBufferIsUnderutilized() {
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
// use private node manager so we can add a node later
InMemoryNodeManager nodeManager = new InMemoryNodeManager();
nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
PlanFragment plan = createFragment();
StageExecution stage = createStageExecution(plan, nodeTaskMap);
// setting under utilized child output buffer
StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(500, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 500, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> false);
// the queues of 3 running nodes should be full
ScheduleResult scheduleResult = scheduler.schedule();
assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
assertEquals(scheduleResult.getNewTasks().size(), 3);
assertEquals(scheduleResult.getSplitsScheduled(), 300);
for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
assertEquals(splitsInfo.getCount(), 100);
}
// new node added - the pending splits should go to it since the child tasks are not blocked
nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
scheduleResult = scheduler.schedule();
// split queue is full but still the source task creation isn't blocked
assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
assertEquals(scheduleResult.getNewTasks().size(), 1);
assertEquals(scheduleResult.getSplitsScheduled(), 100);
}
use of io.trino.server.DynamicFilterService in project trino by trinodb.
the class TestSourcePartitionedScheduler method testNoNewTaskScheduledWhenChildStageBufferIsOverutilized.
@Test
public void testNoNewTaskScheduledWhenChildStageBufferIsOverutilized() {
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
// use private node manager so we can add a node later
InMemoryNodeManager nodeManager = new InMemoryNodeManager();
nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
PlanFragment plan = createFragment();
StageExecution stage = createStageExecution(plan, nodeTaskMap);
// setting over utilized child output buffer
StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(400, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 400, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> true);
// the queues of 3 running nodes should be full
ScheduleResult scheduleResult = scheduler.schedule();
assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
assertEquals(scheduleResult.getNewTasks().size(), 3);
assertEquals(scheduleResult.getSplitsScheduled(), 300);
for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
assertEquals(splitsInfo.getCount(), 100);
}
// new node added but 1 child's output buffer is overutilized - so lockdown the tasks
nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
scheduleResult = scheduler.schedule();
assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
assertEquals(scheduleResult.getNewTasks().size(), 0);
assertEquals(scheduleResult.getSplitsScheduled(), 0);
}
use of io.trino.server.DynamicFilterService in project trino by trinodb.
the class TestSourcePartitionedScheduler method getSourcePartitionedScheduler.
private StageScheduler getSourcePartitionedScheduler(ConnectorSplitSource splitSource, StageExecution stage, InternalNodeManager nodeManager, NodeTaskMap nodeTaskMap, int splitBatchSize, SplitsBalancingPolicy splitsBalancingPolicy) {
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setIncludeCoordinator(false).setMaxSplitsPerNode(20).setMaxPendingSplitsPerTask(0).setSplitsBalancingPolicy(splitsBalancingPolicy);
NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, nodeSchedulerConfig, nodeTaskMap, new Duration(0, SECONDS)));
SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks);
return newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, splitSource), placementPolicy, splitBatchSize, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> false);
}
use of io.trino.server.DynamicFilterService in project trino by trinodb.
the class TestSourcePartitionedScheduler method testDynamicFiltersUnblockedOnBlockedBuildSource.
@Test
public void testDynamicFiltersUnblockedOnBlockedBuildSource() {
PlanFragment plan = createFragment();
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
StageExecution stage = createStageExecution(plan, nodeTaskMap);
NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap));
DynamicFilterService dynamicFilterService = new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig());
dynamicFilterService.registerQuery(QUERY_ID, TEST_SESSION, ImmutableSet.of(DYNAMIC_FILTER_ID), ImmutableSet.of(DYNAMIC_FILTER_ID), ImmutableSet.of(DYNAMIC_FILTER_ID));
StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createBlockedSplitSource()), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 2, dynamicFilterService, new TableExecuteContextManager(), () -> true);
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol = symbolAllocator.newSymbol("DF_SYMBOL1", BIGINT);
DynamicFilter dynamicFilter = dynamicFilterService.createDynamicFilter(QUERY_ID, ImmutableList.of(new DynamicFilters.Descriptor(DYNAMIC_FILTER_ID, symbol.toSymbolReference())), ImmutableMap.of(symbol, new TestingColumnHandle("probeColumnA")), symbolAllocator.getTypes());
// make sure dynamic filtering collecting task was created immediately
assertEquals(stage.getState(), PLANNED);
scheduler.start();
assertEquals(stage.getAllTasks().size(), 1);
assertEquals(stage.getState(), SCHEDULING);
// make sure dynamic filter is initially blocked
assertFalse(dynamicFilter.isBlocked().isDone());
// make sure dynamic filter is unblocked due to build side source tasks being blocked
ScheduleResult scheduleResult = scheduler.schedule();
assertTrue(dynamicFilter.isBlocked().isDone());
// no new probe splits should be scheduled
assertEquals(scheduleResult.getSplitsScheduled(), 0);
}
Aggregations