Search in sources :

Example 26 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class AbstractOperatorBenchmark method createTableScanOperator.

protected final OperatorFactory createTableScanOperator(int operatorId, PlanNodeId planNodeId, String tableName, String... columnNames) {
    checkArgument(session.getCatalog().isPresent(), "catalog not set");
    checkArgument(session.getSchema().isPresent(), "schema not set");
    // look up the table
    Metadata metadata = localQueryRunner.getMetadata();
    QualifiedObjectName qualifiedTableName = new QualifiedObjectName(session.getCatalog().get(), session.getSchema().get(), tableName);
    TableHandle tableHandle = metadata.getTableHandle(session, qualifiedTableName).orElse(null);
    checkArgument(tableHandle != null, "Table '%s' does not exist", qualifiedTableName);
    // lookup the columns
    Map<String, ColumnHandle> allColumnHandles = metadata.getColumnHandles(session, tableHandle);
    ImmutableList.Builder<ColumnHandle> columnHandlesBuilder = ImmutableList.builder();
    for (String columnName : columnNames) {
        ColumnHandle columnHandle = allColumnHandles.get(columnName);
        checkArgument(columnHandle != null, "Table '%s' does not have a column '%s'", tableName, columnName);
        columnHandlesBuilder.add(columnHandle);
    }
    List<ColumnHandle> columnHandles = columnHandlesBuilder.build();
    // get the split for this table
    Split split = getLocalQuerySplit(session, tableHandle);
    return new OperatorFactory() {

        @Override
        public Operator createOperator(DriverContext driverContext) {
            OperatorContext operatorContext = driverContext.addOperatorContext(operatorId, planNodeId, "BenchmarkSource");
            ConnectorPageSource pageSource = localQueryRunner.getPageSourceManager().createPageSource(session, split, tableHandle, columnHandles, DynamicFilter.EMPTY);
            return new PageSourceOperator(pageSource, operatorContext);
        }

        @Override
        public void noMoreOperators() {
        }

        @Override
        public OperatorFactory duplicate() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) DriverContext(io.trino.operator.DriverContext) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) AggregationMetadata(io.trino.operator.aggregation.AggregationMetadata) Metadata(io.trino.metadata.Metadata) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) PageSourceOperator(io.trino.operator.PageSourceOperator) OperatorFactory(io.trino.operator.OperatorFactory) OperatorContext(io.trino.operator.OperatorContext) TableHandle(io.trino.metadata.TableHandle) Split(io.trino.metadata.Split)

Example 27 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class AbstractOperatorBenchmark method getLocalQuerySplit.

private Split getLocalQuerySplit(Session session, TableHandle handle) {
    SplitSource splitSource = localQueryRunner.getSplitManager().getSplits(session, handle, UNGROUPED_SCHEDULING, EMPTY, alwaysTrue());
    List<Split> splits = new ArrayList<>();
    while (!splitSource.isFinished()) {
        splits.addAll(getNextBatch(splitSource));
    }
    checkArgument(splits.size() == 1, "Expected only one split for a local query, but got %s splits", splits.size());
    return splits.get(0);
}
Also used : ArrayList(java.util.ArrayList) SplitSource(io.trino.split.SplitSource) Split(io.trino.metadata.Split)

Example 28 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class TestNodeScheduler method testTopologyAwareScheduling.

@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() {
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
    nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
    ImmutableList<InternalNode> nodes = nodeBuilder.build();
    nodeManager.addNode(CONNECTOR_ID, nodes);
    // contents of taskMap indicate the node-task map for the current stage
    Map<InternalNode, RemoteTask> taskMap = new HashMap<>();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(20);
    TestNetworkTopology topology = new TestNetworkTopology();
    NodeSelectorFactory nodeSelectorFactory = new TopologyAwareNodeSelectorFactory(topology, nodeManager, nodeSchedulerConfig, nodeTaskMap, getNetworkTopologyConfig());
    NodeScheduler nodeScheduler = new NodeScheduler(nodeSelectorFactory);
    NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, Optional.of(CONNECTOR_ID));
    // Fill up the nodes with non-local data
    ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
    for (int i = 0; i < (25 + 11) * 3; i++) {
        nonRackLocalBuilder.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1)), Lifespan.taskWide()));
    }
    Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    int task = 0;
    for (InternalNode node : assignments.keySet()) {
        TaskId taskId = new TaskId(new StageId("test", 1), task, 0);
        task++;
        MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeTaskMap.createPartitionedSplitCountTracker(node, taskId));
        remoteTask.startSplits(25);
        nodeTaskMap.addTask(node, remoteTask);
        taskMap.put(node, remoteTask);
    }
    // Continue assigning to fill up part of the queue
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    // Check that 3 of the splits were rejected, since they're non-local
    assertEquals(nonRackLocalSplits.size(), 3);
    // Assign rack-local splits
    ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
    HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
    HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
    for (int i = 0; i < 6 * 2; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost1), Lifespan.taskWide()));
    }
    for (int i = 0; i < 6; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost2), Lifespan.taskWide()));
    }
    assignments = nodeSelector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
    // Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
    // loading of the NetworkLocationCache
    assignments = nodeSelector.computeAssignments(unassigned, ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
    assertEquals(unassigned.size(), 3);
    int rack1 = 0;
    int rack2 = 0;
    for (Split split : unassigned) {
        String rack = topology.locate(split.getAddresses().get(0)).getSegments().get(0);
        switch(rack) {
            case "rack1":
                rack1++;
                break;
            case "rack2":
                rack2++;
                break;
            default:
                throw new AssertionError("Unexpected rack: " + rack);
        }
    }
    assertEquals(rack1, 2);
    assertEquals(rack2, 1);
    // Assign local splits
    ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1)), Lifespan.taskWide()));
    assignments = nodeSelector.computeAssignments(localSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
    assertEquals(assignments.size(), 3);
    assertEquals(assignments.keySet().size(), 3);
}
Also used : HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) TopologyAwareNodeSelectorFactory(io.trino.execution.scheduler.TopologyAwareNodeSelectorFactory) NodeSelectorFactory(io.trino.execution.scheduler.NodeSelectorFactory) UniformNodeSelectorFactory(io.trino.execution.scheduler.UniformNodeSelectorFactory) NodeSchedulerConfig(io.trino.execution.scheduler.NodeSchedulerConfig) HostAddress(io.trino.spi.HostAddress) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) NodeScheduler(io.trino.execution.scheduler.NodeScheduler) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) InternalNode(io.trino.metadata.InternalNode) NodeSelector(io.trino.execution.scheduler.NodeSelector) UniformNodeSelector(io.trino.execution.scheduler.UniformNodeSelector) Split(io.trino.metadata.Split) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) TopologyAwareNodeSelectorFactory(io.trino.execution.scheduler.TopologyAwareNodeSelectorFactory) Test(org.testng.annotations.Test)

Example 29 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class TestNodeScheduler method testScheduleRemote.

@Test
public void testScheduleRemote() {
    setUpNodes();
    Set<Split> splits = new HashSet<>();
    splits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    assertEquals(assignments.size(), 1);
}
Also used : InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.testng.annotations.Test)

Example 30 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class TestNodeScheduler method testMaxSplitsPerNodePerTask.

@Test
public void testMaxSplitsPerNodePerTask() {
    setUpNodes();
    InternalNode newNode = new InternalNode("other4", URI.create("http://10.0.0.1:14"), NodeVersion.UNKNOWN, false);
    nodeManager.addNode(CONNECTOR_ID, newNode);
    ImmutableList.Builder<Split> initialSplits = ImmutableList.builder();
    for (int i = 0; i < 20; i++) {
        initialSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    List<RemoteTask> tasks = new ArrayList<>();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    for (InternalNode node : nodeManager.getActiveConnectorNodes(CONNECTOR_ID)) {
        // Max out number of splits on node
        TaskId taskId = new TaskId(new StageId("test", 1), 1, 0);
        RemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(node, taskId));
        nodeTaskMap.addTask(node, remoteTask);
        tasks.add(remoteTask);
    }
    TaskId taskId = new TaskId(new StageId("test", 1), 2, 0);
    RemoteTask newRemoteTask = remoteTaskFactory.createTableScanTask(taskId, newNode, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(newNode, taskId));
    // Max out pending splits on new node
    taskMap.put(newNode, newRemoteTask);
    nodeTaskMap.addTask(newNode, newRemoteTask);
    tasks.add(newRemoteTask);
    Set<Split> splits = new HashSet<>();
    for (int i = 0; i < 5; i++) {
        splits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    // no split should be assigned to the newNode, as it already has
    // maxSplitsPerNode + maxSplitsPerNodePerTask assigned to it
    // Splits should be scheduled on the other three nodes
    assertEquals(assignments.keySet().size(), 3);
    // No splits scheduled on the maxed out node
    assertFalse(assignments.keySet().contains(newNode));
    for (RemoteTask task : tasks) {
        task.abort();
    }
    assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(newNode), PartitionedSplitsInfo.forZeroSplits());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.testng.annotations.Test)

Aggregations

Split (io.trino.metadata.Split)56 Test (org.testng.annotations.Test)32 InternalNode (io.trino.metadata.InternalNode)26 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)24 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)22 LinkedHashSet (java.util.LinkedHashSet)17 ImmutableList (com.google.common.collect.ImmutableList)16 HashSet (java.util.HashSet)16 CatalogName (io.trino.connector.CatalogName)14 Page (io.trino.spi.Page)12 List (java.util.List)11 Optional (java.util.Optional)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9 ImmutableSet (com.google.common.collect.ImmutableSet)9 ArrayList (java.util.ArrayList)9 Lifespan (io.trino.execution.Lifespan)8 RemoteTask (io.trino.execution.RemoteTask)8 TestingSplit (io.trino.testing.TestingSplit)8 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6