Search in sources :

Example 1 with DynamicFilterService

use of io.trino.server.DynamicFilterService in project trino by trinodb.

the class TestHttpRemoteTask method testOutboundDynamicFilters.

@Test(timeOut = 30_000)
public void testOutboundDynamicFilters() throws Exception {
    DynamicFilterId filterId1 = new DynamicFilterId("df1");
    DynamicFilterId filterId2 = new DynamicFilterId("df2");
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol1 = symbolAllocator.newSymbol("DF_SYMBOL1", BIGINT);
    Symbol symbol2 = symbolAllocator.newSymbol("DF_SYMBOL2", BIGINT);
    SymbolReference df1 = symbol1.toSymbolReference();
    SymbolReference df2 = symbol2.toSymbolReference();
    ColumnHandle handle1 = new TestingColumnHandle("column1");
    ColumnHandle handle2 = new TestingColumnHandle("column2");
    QueryId queryId = new QueryId("test");
    TestingTaskResource testingTaskResource = new TestingTaskResource(new AtomicLong(System.nanoTime()), FailureScenario.NO_FAILURE);
    DynamicFilterService dynamicFilterService = new DynamicFilterService(PLANNER_CONTEXT.getMetadata(), PLANNER_CONTEXT.getFunctionManager(), new TypeOperators(), newDirectExecutorService());
    dynamicFilterService.registerQuery(queryId, TEST_SESSION, ImmutableSet.of(filterId1, filterId2), ImmutableSet.of(filterId1, filterId2), ImmutableSet.of());
    dynamicFilterService.stageCannotScheduleMoreTasks(new StageId(queryId, 1), 0, 1);
    DynamicFilter dynamicFilter = dynamicFilterService.createDynamicFilter(queryId, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, df1), new DynamicFilters.Descriptor(filterId2, df2)), ImmutableMap.of(symbol1, handle1, symbol2, handle2), symbolAllocator.getTypes());
    // make sure initial dynamic filter is collected
    CompletableFuture<?> future = dynamicFilter.isBlocked();
    dynamicFilterService.addTaskDynamicFilters(new TaskId(new StageId(queryId.getId(), 1), 1, 0), ImmutableMap.of(filterId1, Domain.singleValue(BIGINT, 1L)));
    future.get();
    assertEquals(dynamicFilter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(handle1, Domain.singleValue(BIGINT, 1L))));
    // Create remote task after dynamic filter is created to simulate new nodes joining
    HttpRemoteTaskFactory httpRemoteTaskFactory = createHttpRemoteTaskFactory(testingTaskResource, dynamicFilterService);
    RemoteTask remoteTask = createRemoteTask(httpRemoteTaskFactory, ImmutableSet.of(filterId1, filterId2));
    testingTaskResource.setInitialTaskInfo(remoteTask.getTaskInfo());
    remoteTask.start();
    assertEventually(new Duration(10, SECONDS), () -> assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 1L));
    assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 1L);
    // schedule a couple of splits to trigger task updates
    addSplit(remoteTask, testingTaskResource, 1);
    addSplit(remoteTask, testingTaskResource, 2);
    // make sure dynamic filter was sent in task updates only once
    assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 1L);
    assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 3L);
    assertEquals(testingTaskResource.getLatestDynamicFilterFromCoordinator(), ImmutableMap.of(filterId1, Domain.singleValue(BIGINT, 1L)));
    future = dynamicFilter.isBlocked();
    dynamicFilterService.addTaskDynamicFilters(new TaskId(new StageId(queryId.getId(), 1), 1, 0), ImmutableMap.of(filterId2, Domain.singleValue(BIGINT, 2L)));
    future.get();
    assertEquals(dynamicFilter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(handle1, Domain.singleValue(BIGINT, 1L), handle2, Domain.singleValue(BIGINT, 2L))));
    // dynamic filter should be sent even though there were no further splits scheduled
    assertEventually(new Duration(10, SECONDS), () -> assertEquals(testingTaskResource.getDynamicFiltersSentCounter(), 2L));
    assertEquals(testingTaskResource.getCreateOrUpdateCounter(), 4L);
    // previously sent dynamic filter should not be repeated
    assertEquals(testingTaskResource.getLatestDynamicFilterFromCoordinator(), ImmutableMap.of(filterId2, Domain.singleValue(BIGINT, 2L)));
    httpRemoteTaskFactory.stop();
    dynamicFilterService.stop();
}
Also used : SymbolAllocator(io.trino.sql.planner.SymbolAllocator) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TaskId(io.trino.execution.TaskId) DynamicFilter(io.trino.spi.connector.DynamicFilter) Symbol(io.trino.sql.planner.Symbol) SymbolReference(io.trino.sql.tree.SymbolReference) QueryId(io.trino.spi.QueryId) StageId(io.trino.execution.StageId) RemoteTask(io.trino.execution.RemoteTask) Duration(io.airlift.units.Duration) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) AtomicLong(java.util.concurrent.atomic.AtomicLong) HttpRemoteTaskFactory(io.trino.server.HttpRemoteTaskFactory) DynamicFilterService(io.trino.server.DynamicFilterService) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) TypeOperators(io.trino.spi.type.TypeOperators) Test(org.testng.annotations.Test)

Example 2 with DynamicFilterService

use of io.trino.server.DynamicFilterService in project trino by trinodb.

the class TestSourcePartitionedScheduler method testNewTaskScheduledWhenChildStageBufferIsUnderutilized.

@Test
public void testNewTaskScheduledWhenChildStageBufferIsUnderutilized() {
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    // use private node manager so we can add a node later
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
    PlanFragment plan = createFragment();
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    // setting under utilized child output buffer
    StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(500, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 500, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> false);
    // the queues of 3 running nodes should be full
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 3);
    assertEquals(scheduleResult.getSplitsScheduled(), 300);
    for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
        PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
        assertEquals(splitsInfo.getCount(), 100);
    }
    // new node added - the pending splits should go to it since the child tasks are not blocked
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
    scheduleResult = scheduler.schedule();
    // split queue is full but still the source task creation isn't blocked
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 1);
    assertEquals(scheduleResult.getSplitsScheduled(), 100);
}
Also used : NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) PartitionedSplitsInfo(io.trino.execution.PartitionedSplitsInfo) MockRemoteTask(io.trino.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.trino.execution.RemoteTask) Duration(io.airlift.units.Duration) PlanFragment(io.trino.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) InternalNode(io.trino.metadata.InternalNode) DynamicFilterService(io.trino.server.DynamicFilterService) TestingSplit(io.trino.testing.TestingSplit) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) Test(org.testng.annotations.Test)

Example 3 with DynamicFilterService

use of io.trino.server.DynamicFilterService in project trino by trinodb.

the class TestSourcePartitionedScheduler method testNoNewTaskScheduledWhenChildStageBufferIsOverutilized.

@Test
public void testNoNewTaskScheduledWhenChildStageBufferIsOverutilized() {
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    // use private node manager so we can add a node later
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
    PlanFragment plan = createFragment();
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    // setting over utilized child output buffer
    StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(400, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 400, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> true);
    // the queues of 3 running nodes should be full
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 3);
    assertEquals(scheduleResult.getSplitsScheduled(), 300);
    for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
        PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
        assertEquals(splitsInfo.getCount(), 100);
    }
    // new node added but 1 child's output buffer is overutilized - so lockdown the tasks
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
    scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 0);
    assertEquals(scheduleResult.getSplitsScheduled(), 0);
}
Also used : NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) PartitionedSplitsInfo(io.trino.execution.PartitionedSplitsInfo) MockRemoteTask(io.trino.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.trino.execution.RemoteTask) Duration(io.airlift.units.Duration) PlanFragment(io.trino.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) InternalNode(io.trino.metadata.InternalNode) DynamicFilterService(io.trino.server.DynamicFilterService) TestingSplit(io.trino.testing.TestingSplit) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) Test(org.testng.annotations.Test)

Example 4 with DynamicFilterService

use of io.trino.server.DynamicFilterService in project trino by trinodb.

the class TestSourcePartitionedScheduler method getSourcePartitionedScheduler.

private StageScheduler getSourcePartitionedScheduler(ConnectorSplitSource splitSource, StageExecution stage, InternalNodeManager nodeManager, NodeTaskMap nodeTaskMap, int splitBatchSize, SplitsBalancingPolicy splitsBalancingPolicy) {
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setIncludeCoordinator(false).setMaxSplitsPerNode(20).setMaxPendingSplitsPerTask(0).setSplitsBalancingPolicy(splitsBalancingPolicy);
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, nodeSchedulerConfig, nodeTaskMap, new Duration(0, SECONDS)));
    SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks);
    return newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, splitSource), placementPolicy, splitBatchSize, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> false);
}
Also used : TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) Duration(io.airlift.units.Duration) DynamicFilterService(io.trino.server.DynamicFilterService) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig)

Example 5 with DynamicFilterService

use of io.trino.server.DynamicFilterService in project trino by trinodb.

the class TestSourcePartitionedScheduler method testDynamicFiltersUnblockedOnBlockedBuildSource.

@Test
public void testDynamicFiltersUnblockedOnBlockedBuildSource() {
    PlanFragment plan = createFragment();
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap));
    DynamicFilterService dynamicFilterService = new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig());
    dynamicFilterService.registerQuery(QUERY_ID, TEST_SESSION, ImmutableSet.of(DYNAMIC_FILTER_ID), ImmutableSet.of(DYNAMIC_FILTER_ID), ImmutableSet.of(DYNAMIC_FILTER_ID));
    StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createBlockedSplitSource()), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 2, dynamicFilterService, new TableExecuteContextManager(), () -> true);
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol = symbolAllocator.newSymbol("DF_SYMBOL1", BIGINT);
    DynamicFilter dynamicFilter = dynamicFilterService.createDynamicFilter(QUERY_ID, ImmutableList.of(new DynamicFilters.Descriptor(DYNAMIC_FILTER_ID, symbol.toSymbolReference())), ImmutableMap.of(symbol, new TestingColumnHandle("probeColumnA")), symbolAllocator.getTypes());
    // make sure dynamic filtering collecting task was created immediately
    assertEquals(stage.getState(), PLANNED);
    scheduler.start();
    assertEquals(stage.getAllTasks().size(), 1);
    assertEquals(stage.getState(), SCHEDULING);
    // make sure dynamic filter is initially blocked
    assertFalse(dynamicFilter.isBlocked().isDone());
    // make sure dynamic filter is unblocked due to build side source tasks being blocked
    ScheduleResult scheduleResult = scheduler.schedule();
    assertTrue(dynamicFilter.isBlocked().isDone());
    // no new probe splits should be scheduled
    assertEquals(scheduleResult.getSplitsScheduled(), 0);
}
Also used : SymbolAllocator(io.trino.sql.planner.SymbolAllocator) NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) DynamicFilter(io.trino.spi.connector.DynamicFilter) Symbol(io.trino.sql.planner.Symbol) PlanFragment(io.trino.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) TestingColumnHandle(io.trino.testing.TestingMetadata.TestingColumnHandle) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) DynamicFilterService(io.trino.server.DynamicFilterService) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) Test(org.testng.annotations.Test)

Aggregations

DynamicFilterService (io.trino.server.DynamicFilterService)6 Duration (io.airlift.units.Duration)5 Test (org.testng.annotations.Test)5 DynamicFilterConfig (io.trino.execution.DynamicFilterConfig)4 RemoteTask (io.trino.execution.RemoteTask)4 TableExecuteContextManager (io.trino.execution.TableExecuteContextManager)4 ConnectorAwareSplitSource (io.trino.split.ConnectorAwareSplitSource)4 NodeTaskMap (io.trino.execution.NodeTaskMap)3 PipelinedStageExecution.createPipelinedStageExecution (io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution)3 SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler (io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler)3 DynamicFilter (io.trino.spi.connector.DynamicFilter)3 PlanFragment (io.trino.sql.planner.PlanFragment)3 Symbol (io.trino.sql.planner.Symbol)3 SymbolAllocator (io.trino.sql.planner.SymbolAllocator)3 MockRemoteTask (io.trino.execution.MockRemoteTaskFactory.MockRemoteTask)2 PartitionedSplitsInfo (io.trino.execution.PartitionedSplitsInfo)2 StageId (io.trino.execution.StageId)2 InMemoryNodeManager (io.trino.metadata.InMemoryNodeManager)2 InternalNode (io.trino.metadata.InternalNode)2 HttpRemoteTaskFactory (io.trino.server.HttpRemoteTaskFactory)2