Search in sources :

Example 16 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestNodeScheduler method testTopologyAwareScheduling.

@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
    NodeTaskMap nodeMap = new NodeTaskMap(finalizerService);
    InMemoryNodeManager memoryNodeManager = new InMemoryNodeManager();
    ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
    nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
    ImmutableList<InternalNode> nodes = nodeBuilder.build();
    memoryNodeManager.addNode(CONNECTOR_ID, nodes);
    // contents of taskMap indicate the node-task map for the current stage
    Map<InternalNode, RemoteTask> nodeRemoteTaskHashMap = new HashMap<>();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology) {

        @Override
        public NetworkLocation get(HostAddress host) {
            // Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
            if (host.getHostText().startsWith("host")) {
                return topology.locate(host);
            } else {
                return super.get(host);
            }
        }
    };
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, memoryNodeManager, nodeSchedulerConfig, nodeMap);
    NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    // Fill up the nodes with non-local data
    ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
    for (int i = 0; i < (25 + 11) * 3; i++) {
        nonRackLocalBuilder.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1)), Lifespan.taskWide()));
    }
    Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
    Multimap<InternalNode, Split> assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    int task = 0;
    for (InternalNode node : assignments.keySet()) {
        TaskId taskId = new TaskId("test", 1, task);
        task++;
        MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeMap.createPartitionedSplitCountTracker(node, taskId));
        remoteTask.startSplits(25);
        nodeMap.addTask(node, remoteTask);
        nodeRemoteTaskHashMap.put(node, remoteTask);
    }
    // Continue assigning to fill up part of the queue
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    // Check that 3 of the splits were rejected, since they're non-local
    assertEquals(nonRackLocalSplits.size(), 3);
    // Assign rack-local splits
    ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
    HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
    HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
    for (int i = 0; i < 6 * 2; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost1), Lifespan.taskWide()));
    }
    for (int i = 0; i < 6; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost2), Lifespan.taskWide()));
    }
    assignments = selector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
    // Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
    // loading of the NetworkLocationCache
    boolean cacheRefreshed = false;
    while (!cacheRefreshed) {
        cacheRefreshed = true;
        if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        MILLISECONDS.sleep(10);
    }
    assignments = selector.computeAssignments(unassigned, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
    assertEquals(unassigned.size(), 3);
    int rack1 = 0;
    int rack2 = 0;
    for (Split split : unassigned) {
        String rack = topology.locate(split.getAddresses().get(0)).getSegments().get(0);
        switch(rack) {
            case "rack1":
                rack1++;
                break;
            case "rack2":
                rack2++;
                break;
            default:
                fail();
        }
    }
    assertEquals(rack1, 2);
    assertEquals(rack2, 1);
    // Assign local splits
    ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1)), Lifespan.taskWide()));
    assignments = selector.computeAssignments(localSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    assertEquals(assignments.size(), 3);
    assertEquals(assignments.keySet().size(), 3);
}
Also used : TaskId(io.prestosql.execution.TaskId) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.prestosql.spi.HostAddress) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) NodeTaskMap(io.prestosql.execution.NodeTaskMap) RemoteTask(io.prestosql.execution.RemoteTask) InMemoryNodeManager(io.prestosql.metadata.InMemoryNodeManager) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 17 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestNodeScheduler method testScheduleRemote.

@Test
public void testScheduleRemote() {
    setUpNodes();
    Set<Split> splits = new HashSet<>();
    splits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    assertEquals(assignments.size(), 1);
}
Also used : InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.testng.annotations.Test)

Example 18 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestNodeScheduler method testRedistributeSplit.

@Test
public void testRedistributeSplit() {
    InternalNode node1 = new InternalNode("node1", URI.create("http://10.0.0.1:11"), NodeVersion.UNKNOWN, false);
    nodeManager.addNode(CONNECTOR_ID, node1);
    InternalNode node2 = new InternalNode("node2", URI.create("http://10.0.0.1:12"), NodeVersion.UNKNOWN, false);
    nodeManager.addNode(CONNECTOR_ID, node2);
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    Set<Split> splitsAssignedToNode1 = new LinkedHashSet<>();
    // Node1 to be assigned 12 splits out of which 6 are local to it
    for (int i = 0; i < 6; i++) {
        splitsAssignedToNode1.add(new Split(CONNECTOR_ID, new TestSplitLocal(), Lifespan.taskWide()));
        splitsAssignedToNode1.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    for (Split split : splitsAssignedToNode1) {
        assignment.put(node1, split);
    }
    Set<Split> splitsAssignedToNode2 = new LinkedHashSet<>();
    // Node2 to be assigned 10 splits
    for (int i = 0; i < 10; i++) {
        splitsAssignedToNode2.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    for (Split split : splitsAssignedToNode2) {
        assignment.put(node2, split);
    }
    assertEquals(assignment.get(node1).size(), 12);
    assertEquals(assignment.get(node2).size(), 10);
    ImmutableSetMultimap.Builder<InetAddress, InternalNode> nodesByHost = ImmutableSetMultimap.builder();
    try {
        nodesByHost.put(InetAddress.getByName(node1.getInternalUri().getHost()), node1);
        nodesByHost.put(InetAddress.getByName(node2.getInternalUri().getHost()), node2);
    } catch (UnknownHostException e) {
        System.out.println("Could not convert the address");
    }
    // Redistribute 1 split from Node 1 to Node 2
    SimpleNodeSelector.redistributeSplit(assignment, node1, node2, nodesByHost.build());
    assertEquals(assignment.get(node1).size(), 11);
    assertEquals(assignment.get(node2).size(), 11);
    Set<Split> redistributedSplit = Sets.difference(new HashSet<>(assignment.get(node2)), splitsAssignedToNode2);
    assertEquals(redistributedSplit.size(), 1);
    // Assert that the redistributed split is not a local split in Node 1. This test ensures that redistributeSingleSplit() prioritizes the transfer of a non-local split
    assertTrue(redistributedSplit.iterator().next().getConnectorSplit() instanceof TestSplitRemote);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) UnknownHostException(java.net.UnknownHostException) ImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) InetAddress(java.net.InetAddress) Test(org.testng.annotations.Test)

Example 19 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestNodeScheduler method testTaskCompletion.

@Test
public void testTaskCompletion() throws Exception {
    setUpNodes();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    InternalNode chosenNode = Iterables.get(nodeManager.getActiveConnectorNodes(CONNECTOR_ID), 0);
    TaskId taskId = new TaskId("test", 1, 1);
    RemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, chosenNode, ImmutableList.of(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide())), nodeTaskMap.createPartitionedSplitCountTracker(chosenNode, taskId));
    nodeTaskMap.addTask(chosenNode, remoteTask);
    assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(chosenNode), 1);
    remoteTask.abort();
    // Sleep until cache expires
    MILLISECONDS.sleep(100);
    assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(chosenNode), 0);
    remoteTask.abort();
    assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(chosenNode), 0);
}
Also used : TaskId(io.prestosql.execution.TaskId) RemoteTask(io.prestosql.execution.RemoteTask) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 20 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestNodeScheduler method testMaxSplitsPerNodePerTask.

@Test
public void testMaxSplitsPerNodePerTask() {
    setUpNodes();
    InternalNode newNode = new InternalNode("other4", URI.create("http://10.0.0.1:14"), NodeVersion.UNKNOWN, false);
    nodeManager.addNode(CONNECTOR_ID, newNode);
    ImmutableList.Builder<Split> initialSplits = ImmutableList.builder();
    for (int i = 0; i < 20; i++) {
        initialSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    List<RemoteTask> tasks = new ArrayList<>();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    for (InternalNode node : nodeManager.getActiveConnectorNodes(CONNECTOR_ID)) {
        // Max out number of splits on node
        TaskId taskId = new TaskId("test", 1, 1);
        RemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(node, taskId));
        nodeTaskMap.addTask(node, remoteTask);
        tasks.add(remoteTask);
    }
    TaskId taskId = new TaskId("test", 1, 2);
    RemoteTask newRemoteTask = remoteTaskFactory.createTableScanTask(taskId, newNode, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(newNode, taskId));
    // Max out pending splits on new node
    taskMap.put(newNode, newRemoteTask);
    nodeTaskMap.addTask(newNode, newRemoteTask);
    tasks.add(newRemoteTask);
    Set<Split> splits = new HashSet<>();
    for (int i = 0; i < 5; i++) {
        splits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
    }
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    // no split should be assigned to the newNode, as it already has
    // maxSplitsPerNode + maxSplitsPerNodePerTask assigned to it
    // Splits should be scheduled on the other three nodes
    assertEquals(assignments.keySet().size(), 3);
    // No splits scheduled on the maxed out node
    assertFalse(assignments.keySet().contains(newNode));
    for (RemoteTask task : tasks) {
        task.abort();
    }
    assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(newNode), 0);
}
Also used : TaskId(io.prestosql.execution.TaskId) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) RemoteTask(io.prestosql.execution.RemoteTask) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.testng.annotations.Test)

Aggregations

Split (io.prestosql.metadata.Split)69 Test (org.testng.annotations.Test)35 InternalNode (io.prestosql.metadata.InternalNode)34 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)33 TestingSplit (io.prestosql.testing.TestingSplit)32 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)31 MockSplit (io.prestosql.MockSplit)20 CatalogName (io.prestosql.spi.connector.CatalogName)18 ArrayList (java.util.ArrayList)18 ImmutableList (com.google.common.collect.ImmutableList)17 UUID (java.util.UUID)17 HashMap (java.util.HashMap)15 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)14 NodeTaskMap (io.prestosql.execution.NodeTaskMap)13 RemoteTask (io.prestosql.execution.RemoteTask)13 QualifiedObjectName (io.prestosql.spi.connector.QualifiedObjectName)13 SqlStageExecution (io.prestosql.execution.SqlStageExecution)12 HashSet (java.util.HashSet)12 Optional (java.util.Optional)12 Lifespan (io.prestosql.execution.Lifespan)11