Search in sources :

Example 11 with PlanFragment

use of io.trino.sql.planner.PlanFragment in project trino by trinodb.

the class TestSourcePartitionedScheduler method testNewTaskScheduledWhenChildStageBufferIsUnderutilized.

@Test
public void testNewTaskScheduledWhenChildStageBufferIsUnderutilized() {
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    // use private node manager so we can add a node later
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
    PlanFragment plan = createFragment();
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    // setting under utilized child output buffer
    StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(500, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 500, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> false);
    // the queues of 3 running nodes should be full
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 3);
    assertEquals(scheduleResult.getSplitsScheduled(), 300);
    for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
        PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
        assertEquals(splitsInfo.getCount(), 100);
    }
    // new node added - the pending splits should go to it since the child tasks are not blocked
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
    scheduleResult = scheduler.schedule();
    // split queue is full but still the source task creation isn't blocked
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 1);
    assertEquals(scheduleResult.getSplitsScheduled(), 100);
}
Also used : NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) PartitionedSplitsInfo(io.trino.execution.PartitionedSplitsInfo) MockRemoteTask(io.trino.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.trino.execution.RemoteTask) Duration(io.airlift.units.Duration) PlanFragment(io.trino.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) InternalNode(io.trino.metadata.InternalNode) DynamicFilterService(io.trino.server.DynamicFilterService) TestingSplit(io.trino.testing.TestingSplit) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) Test(org.testng.annotations.Test)

Example 12 with PlanFragment

use of io.trino.sql.planner.PlanFragment in project trino by trinodb.

the class TestSourcePartitionedScheduler method createStageExecution.

private StageExecution createStageExecution(PlanFragment fragment, NodeTaskMap nodeTaskMap) {
    StageId stageId = new StageId(QUERY_ID, 0);
    SqlStage stage = SqlStage.createSqlStage(stageId, fragment, ImmutableMap.of(TABLE_SCAN_NODE_ID, new TableInfo(new QualifiedObjectName("test", "test", "test"), TupleDomain.all())), new MockRemoteTaskFactory(queryExecutor, scheduledExecutor), TEST_SESSION, true, nodeTaskMap, queryExecutor, new SplitSchedulerStats());
    ImmutableMap.Builder<PlanFragmentId, OutputBufferManager> outputBuffers = ImmutableMap.builder();
    outputBuffers.put(fragment.getId(), new PartitionedOutputBufferManager(FIXED_HASH_DISTRIBUTION, 1));
    fragment.getRemoteSourceNodes().stream().flatMap(node -> node.getSourceFragmentIds().stream()).forEach(fragmentId -> outputBuffers.put(fragmentId, new PartitionedOutputBufferManager(FIXED_HASH_DISTRIBUTION, 10)));
    return createPipelinedStageExecution(stage, outputBuffers.buildOrThrow(), TaskLifecycleListener.NO_OP, new NoOpFailureDetector(), queryExecutor, Optional.of(new int[] { 0 }), 0);
}
Also used : PlanFragment(io.trino.sql.planner.PlanFragment) PartitionedSplitsInfo(io.trino.execution.PartitionedSplitsInfo) FIXED_HASH_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) RemoteSourceNode(io.trino.sql.planner.plan.RemoteSourceNode) CatalogName(io.trino.connector.CatalogName) FinalizerService(io.trino.util.FinalizerService) ConnectorPartitionHandle(io.trino.spi.connector.ConnectorPartitionHandle) DynamicFilters(io.trino.sql.DynamicFilters) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) TEST_SESSION(io.trino.SessionTestUtils.TEST_SESSION) Assert.assertFalse(org.testng.Assert.assertFalse) TableScanNode(io.trino.sql.planner.plan.TableScanNode) SplitsBalancingPolicy(io.trino.execution.scheduler.NodeSchedulerConfig.SplitsBalancingPolicy) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) NODE(io.trino.execution.scheduler.NodeSchedulerConfig.SplitsBalancingPolicy.NODE) TestingSession(io.trino.testing.TestingSession) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) Session(io.trino.Session) InternalNodeManager(io.trino.metadata.InternalNodeManager) StageId(io.trino.execution.StageId) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) Supplier(java.util.function.Supplier) REPLICATE(io.trino.sql.planner.plan.ExchangeNode.Type.REPLICATE) StatsAndCosts(io.trino.cost.StatsAndCosts) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) ArrayList(java.util.ArrayList) STAGE(io.trino.execution.scheduler.NodeSchedulerConfig.SplitsBalancingPolicy.STAGE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TestingSplit(io.trino.testing.TestingSplit) StageExecutionDescriptor.ungroupedExecution(io.trino.operator.StageExecutionDescriptor.ungroupedExecution) NodeVersion(io.trino.client.NodeVersion) MockRemoteTaskFactory(io.trino.execution.MockRemoteTaskFactory) NoOpFailureDetector(io.trino.failuredetector.NoOpFailureDetector) Integer.min(java.lang.Integer.min) AfterClass(org.testng.annotations.AfterClass) SqlStage(io.trino.execution.SqlStage) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) MockRemoteTask(io.trino.execution.MockRemoteTaskFactory.MockRemoteTask) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) QueryId(io.trino.spi.QueryId) NodeTaskMap(io.trino.execution.NodeTaskMap) SymbolAllocator(io.trino.sql.planner.SymbolAllocator) SCHEDULING(io.trino.execution.scheduler.StageExecution.State.SCHEDULING) FilterNode(io.trino.sql.planner.plan.FilterNode) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) URI(java.net.URI) SINGLE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION) JoinNode(io.trino.sql.planner.plan.JoinNode) DynamicFilterService(io.trino.server.DynamicFilterService) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) SPLIT_QUEUES_FULL(io.trino.execution.scheduler.ScheduleResult.BlockedReason.SPLIT_QUEUES_FULL) BeforeClass(org.testng.annotations.BeforeClass) DynamicFilters.createDynamicFilterExpression(io.trino.sql.DynamicFilters.createDynamicFilterExpression) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) TableInfo(io.trino.execution.TableInfo) PLANNED(io.trino.execution.scheduler.StageExecution.State.PLANNED) DynamicFilter(io.trino.spi.connector.DynamicFilter) MetadataManager.createTestMetadataManager(io.trino.metadata.MetadataManager.createTestMetadataManager) Optional(java.util.Optional) NO_NODES_AVAILABLE(io.trino.spi.StandardErrorCode.NO_NODES_AVAILABLE) TestingColumnHandle(io.trino.testing.TestingMetadata.TestingColumnHandle) INNER(io.trino.sql.planner.plan.JoinNode.Type.INNER) Assert.assertEquals(org.testng.Assert.assertEquals) CompletableFuture(java.util.concurrent.CompletableFuture) RetryPolicy(io.trino.operator.RetryPolicy) Partitioning(io.trino.sql.planner.Partitioning) PartitioningScheme(io.trino.sql.planner.PartitioningScheme) FunctionManager.createTestingFunctionManager(io.trino.metadata.FunctionManager.createTestingFunctionManager) ImmutableList(com.google.common.collect.ImmutableList) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) Objects.requireNonNull(java.util.Objects.requireNonNull) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) ExecutorService(java.util.concurrent.ExecutorService) Symbol(io.trino.sql.planner.Symbol) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) RemoteTask(io.trino.execution.RemoteTask) TupleDomain(io.trino.spi.predicate.TupleDomain) FunctionManager(io.trino.metadata.FunctionManager) InternalNode(io.trino.metadata.InternalNode) TEST_TABLE_HANDLE(io.trino.testing.TestingHandles.TEST_TABLE_HANDLE) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) SOURCE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION) Metadata(io.trino.metadata.Metadata) Assert.assertTrue(org.testng.Assert.assertTrue) SECONDS(java.util.concurrent.TimeUnit.SECONDS) NoOpFailureDetector(io.trino.failuredetector.NoOpFailureDetector) SqlStage(io.trino.execution.SqlStage) StageId(io.trino.execution.StageId) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) ImmutableMap(com.google.common.collect.ImmutableMap) TableInfo(io.trino.execution.TableInfo) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) MockRemoteTaskFactory(io.trino.execution.MockRemoteTaskFactory)

Example 13 with PlanFragment

use of io.trino.sql.planner.PlanFragment in project trino by trinodb.

the class TestSourcePartitionedScheduler method testScheduleNoSplits.

@Test
public void testScheduleNoSplits() {
    PlanFragment plan = createFragment();
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    StageScheduler scheduler = getSourcePartitionedScheduler(createFixedSplitSource(0, TestingSplit::createRemoteSplit), stage, nodeManager, nodeTaskMap, 1, STAGE);
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getNewTasks().size(), 1);
    assertEffectivelyFinished(scheduleResult, scheduler);
    stage.abort();
}
Also used : NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) PlanFragment(io.trino.sql.planner.PlanFragment) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Test(org.testng.annotations.Test)

Example 14 with PlanFragment

use of io.trino.sql.planner.PlanFragment in project trino by trinodb.

the class TestSourcePartitionedScheduler method testNoNewTaskScheduledWhenChildStageBufferIsOverutilized.

@Test
public void testNoNewTaskScheduledWhenChildStageBufferIsOverutilized() {
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    // use private node manager so we can add a node later
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false), new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false), new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false));
    NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory(nodeManager, new NodeSchedulerConfig().setIncludeCoordinator(false), nodeTaskMap, new Duration(0, SECONDS)));
    PlanFragment plan = createFragment();
    StageExecution stage = createStageExecution(plan, nodeTaskMap);
    // setting over utilized child output buffer
    StageScheduler scheduler = newSourcePartitionedSchedulerAsStageScheduler(stage, TABLE_SCAN_NODE_ID, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(400, TestingSplit::createRemoteSplit)), new DynamicSplitPlacementPolicy(nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID)), stage::getAllTasks), 400, new DynamicFilterService(metadata, functionManager, typeOperators, new DynamicFilterConfig()), new TableExecuteContextManager(), () -> true);
    // the queues of 3 running nodes should be full
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 3);
    assertEquals(scheduleResult.getSplitsScheduled(), 300);
    for (RemoteTask remoteTask : scheduleResult.getNewTasks()) {
        PartitionedSplitsInfo splitsInfo = remoteTask.getPartitionedSplitsInfo();
        assertEquals(splitsInfo.getCount(), 100);
    }
    // new node added but 1 child's output buffer is overutilized - so lockdown the tasks
    nodeManager.addNode(CONNECTOR_ID, new InternalNode("other4", URI.create("http://127.0.0.4:14"), NodeVersion.UNKNOWN, false));
    scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getBlockedReason().get(), SPLIT_QUEUES_FULL);
    assertEquals(scheduleResult.getNewTasks().size(), 0);
    assertEquals(scheduleResult.getSplitsScheduled(), 0);
}
Also used : NodeTaskMap(io.trino.execution.NodeTaskMap) PipelinedStageExecution.createPipelinedStageExecution(io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution) PartitionedSplitsInfo(io.trino.execution.PartitionedSplitsInfo) MockRemoteTask(io.trino.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.trino.execution.RemoteTask) Duration(io.airlift.units.Duration) PlanFragment(io.trino.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.trino.split.ConnectorAwareSplitSource) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) TableExecuteContextManager(io.trino.execution.TableExecuteContextManager) InternalNode(io.trino.metadata.InternalNode) DynamicFilterService(io.trino.server.DynamicFilterService) TestingSplit(io.trino.testing.TestingSplit) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) Test(org.testng.annotations.Test)

Example 15 with PlanFragment

use of io.trino.sql.planner.PlanFragment in project trino by trinodb.

the class TestSourcePartitionedScheduler method createFragment.

private static PlanFragment createFragment() {
    Symbol symbol = new Symbol("column");
    Symbol buildSymbol = new Symbol("buildColumn");
    // table scan with splitCount splits
    TableScanNode tableScan = TableScanNode.newInstance(TABLE_SCAN_NODE_ID, TEST_TABLE_HANDLE, ImmutableList.of(symbol), ImmutableMap.of(symbol, new TestingColumnHandle("column")), false, Optional.empty());
    FilterNode filterNode = new FilterNode(new PlanNodeId("filter_node_id"), tableScan, createDynamicFilterExpression(TEST_SESSION, createTestMetadataManager(), DYNAMIC_FILTER_ID, VARCHAR, symbol.toSymbolReference()));
    RemoteSourceNode remote = new RemoteSourceNode(new PlanNodeId("remote_id"), new PlanFragmentId("plan_fragment_id"), ImmutableList.of(buildSymbol), Optional.empty(), REPLICATE, RetryPolicy.NONE);
    return new PlanFragment(new PlanFragmentId("plan_id"), new JoinNode(new PlanNodeId("join_id"), INNER, filterNode, remote, ImmutableList.of(), tableScan.getOutputSymbols(), remote.getOutputSymbols(), false, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), ImmutableMap.of(DYNAMIC_FILTER_ID, buildSymbol), Optional.empty()), ImmutableMap.of(symbol, VARCHAR), SOURCE_DISTRIBUTION, ImmutableList.of(TABLE_SCAN_NODE_ID), new PartitioningScheme(Partitioning.create(SINGLE_DISTRIBUTION, ImmutableList.of()), ImmutableList.of(symbol)), ungroupedExecution(), StatsAndCosts.empty(), Optional.empty());
}
Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) TestingColumnHandle(io.trino.testing.TestingMetadata.TestingColumnHandle) RemoteSourceNode(io.trino.sql.planner.plan.RemoteSourceNode) TableScanNode(io.trino.sql.planner.plan.TableScanNode) Symbol(io.trino.sql.planner.Symbol) JoinNode(io.trino.sql.planner.plan.JoinNode) PartitioningScheme(io.trino.sql.planner.PartitioningScheme) FilterNode(io.trino.sql.planner.plan.FilterNode) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) PlanFragment(io.trino.sql.planner.PlanFragment)

Aggregations

PlanFragment (io.trino.sql.planner.PlanFragment)41 Test (org.testng.annotations.Test)22 PlanFragmentId (io.trino.sql.planner.plan.PlanFragmentId)16 ImmutableSet (com.google.common.collect.ImmutableSet)12 NodeTaskMap (io.trino.execution.NodeTaskMap)11 PipelinedStageExecution.createPipelinedStageExecution (io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution)11 SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler (io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler)11 PlanUtils.createBroadcastJoinPlanFragment (io.trino.execution.scheduler.policy.PlanUtils.createBroadcastJoinPlanFragment)11 PlanUtils.createJoinPlanFragment (io.trino.execution.scheduler.policy.PlanUtils.createJoinPlanFragment)11 PlanUtils.createTableScanPlanFragment (io.trino.execution.scheduler.policy.PlanUtils.createTableScanPlanFragment)11 Set (java.util.Set)11 Symbol (io.trino.sql.planner.Symbol)10 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)10 PartitioningScheme (io.trino.sql.planner.PartitioningScheme)9 MockRemoteTask (io.trino.execution.MockRemoteTaskFactory.MockRemoteTask)8 PartitionedSplitsInfo (io.trino.execution.PartitionedSplitsInfo)8 RemoteTask (io.trino.execution.RemoteTask)8 RemoteSourceNode (io.trino.sql.planner.plan.RemoteSourceNode)8 JoinNode (io.trino.sql.planner.plan.JoinNode)7 Duration (io.airlift.units.Duration)6