Search in sources :

Example 1 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestNodeScheduler method testTopologyAwareScheduling.

@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
    NodeTaskMap nodeMap = new NodeTaskMap(finalizerService);
    InMemoryNodeManager memoryNodeManager = new InMemoryNodeManager();
    ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
    nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
    ImmutableList<InternalNode> nodes = nodeBuilder.build();
    memoryNodeManager.addNode(CONNECTOR_ID, nodes);
    // contents of taskMap indicate the node-task map for the current stage
    Map<InternalNode, RemoteTask> nodeRemoteTaskHashMap = new HashMap<>();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology) {

        @Override
        public NetworkLocation get(HostAddress host) {
            // Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
            if (host.getHostText().startsWith("host")) {
                return topology.locate(host);
            } else {
                return super.get(host);
            }
        }
    };
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, memoryNodeManager, nodeSchedulerConfig, nodeMap);
    NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    // Fill up the nodes with non-local data
    ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
    for (int i = 0; i < (25 + 11) * 3; i++) {
        nonRackLocalBuilder.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1)), Lifespan.taskWide()));
    }
    Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
    Multimap<InternalNode, Split> assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    int task = 0;
    for (InternalNode node : assignments.keySet()) {
        TaskId taskId = new TaskId("test", 1, task);
        task++;
        MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeMap.createPartitionedSplitCountTracker(node, taskId));
        remoteTask.startSplits(25);
        nodeMap.addTask(node, remoteTask);
        nodeRemoteTaskHashMap.put(node, remoteTask);
    }
    // Continue assigning to fill up part of the queue
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    // Check that 3 of the splits were rejected, since they're non-local
    assertEquals(nonRackLocalSplits.size(), 3);
    // Assign rack-local splits
    ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
    HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
    HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
    for (int i = 0; i < 6 * 2; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost1), Lifespan.taskWide()));
    }
    for (int i = 0; i < 6; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost2), Lifespan.taskWide()));
    }
    assignments = selector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
    // Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
    // loading of the NetworkLocationCache
    boolean cacheRefreshed = false;
    while (!cacheRefreshed) {
        cacheRefreshed = true;
        if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        MILLISECONDS.sleep(10);
    }
    assignments = selector.computeAssignments(unassigned, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
    assertEquals(unassigned.size(), 3);
    int rack1 = 0;
    int rack2 = 0;
    for (Split split : unassigned) {
        String rack = topology.locate(split.getAddresses().get(0)).getSegments().get(0);
        switch(rack) {
            case "rack1":
                rack1++;
                break;
            case "rack2":
                rack2++;
                break;
            default:
                fail();
        }
    }
    assertEquals(rack1, 2);
    assertEquals(rack2, 1);
    // Assign local splits
    ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1)), Lifespan.taskWide()));
    assignments = selector.computeAssignments(localSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    assertEquals(assignments.size(), 3);
    assertEquals(assignments.keySet().size(), 3);
}
Also used : TaskId(io.prestosql.execution.TaskId) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.prestosql.spi.HostAddress) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) NodeTaskMap(io.prestosql.execution.NodeTaskMap) RemoteTask(io.prestosql.execution.RemoteTask) InMemoryNodeManager(io.prestosql.metadata.InMemoryNodeManager) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 2 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestNodeScheduler method setUp.

@BeforeMethod
public void setUp() {
    PropertyService.setProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED, false);
    finalizerService = new FinalizerService();
    nodeTaskMap = new NodeTaskMap(finalizerService);
    nodeManager = new InMemoryNodeManager();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
    NodeScheduler nodeScheduler = new NodeScheduler(new LegacyNetworkTopology(), nodeManager, nodeSchedulerConfig, nodeTaskMap);
    // contents of taskMap indicate the node-task map for the current stage
    taskMap = new HashMap<>();
    nodeSelector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    remoteTaskExecutor = newCachedThreadPool(daemonThreadsNamed("remoteTaskExecutor-%s"));
    remoteTaskScheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed("remoteTaskScheduledExecutor-%s"));
    finalizerService.start();
}
Also used : FinalizerService(io.prestosql.util.FinalizerService) NodeTaskMap(io.prestosql.execution.NodeTaskMap) InMemoryNodeManager(io.prestosql.metadata.InMemoryNodeManager) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 3 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestSourcePartitionedScheduler method testScheduleSplitsBlock.

@Test
public void testScheduleSplitsBlock() {
    StageExecutionPlan plan = createPlan(createFixedSplitSource(80, TestingSplit::createRemoteSplit));
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
    StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1);
    // schedule first 60 splits, which will cause the scheduler to block
    for (int i = 0; i <= 60; i++) {
        ScheduleResult scheduleResult = scheduler.schedule();
        assertFalse(scheduleResult.isFinished());
        // blocks at 20 per node
        assertEquals(scheduleResult.getBlocked().isDone(), i != 60);
        // first three splits create new tasks
        assertEquals(scheduleResult.getNewTasks().size(), i < 3 ? 1 : 0);
        assertEquals(stage.getAllTasks().size(), i < 3 ? i + 1 : 3);
        assertPartitionedSplitCount(stage, min(i + 1, 60));
    }
    for (RemoteTask remoteTask : stage.getAllTasks()) {
        assertEquals(remoteTask.getPartitionedSplitCount(), 20);
    }
    // todo rewrite MockRemoteTask to fire a tate transition when splits are cleared, and then validate blocked future completes
    // drop the 20 splits from one node
    ((MockRemoteTask) stage.getAllTasks().get(0)).clearSplits();
    // schedule remaining 20 splits
    for (int i = 0; i < 20; i++) {
        ScheduleResult scheduleResult = scheduler.schedule();
        // finishes when last split is fetched
        if (i == 19) {
            assertEffectivelyFinished(scheduleResult, scheduler);
        } else {
            assertFalse(scheduleResult.isFinished());
        }
        // does not block again
        assertTrue(scheduleResult.getBlocked().isDone());
        // no additional tasks will be created
        assertEquals(scheduleResult.getNewTasks().size(), 0);
        assertEquals(stage.getAllTasks().size(), 3);
        // we dropped 20 splits so start at 40 and count to 60
        assertPartitionedSplitCount(stage, min(i + 41, 60));
    }
    for (RemoteTask remoteTask : stage.getAllTasks()) {
        assertEquals(remoteTask.getPartitionedSplitCount(), 20);
    }
    stage.abort();
}
Also used : NodeTaskMap(io.prestosql.execution.NodeTaskMap) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) MockRemoteTask(io.prestosql.execution.MockRemoteTaskFactory.MockRemoteTask) MockRemoteTask(io.prestosql.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.prestosql.execution.RemoteTask) SqlStageExecution(io.prestosql.execution.SqlStageExecution) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Test(org.testng.annotations.Test)

Example 4 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestSourcePartitionedScheduler method testScheduleSplitsOneAtATime.

@Test
public void testScheduleSplitsOneAtATime() {
    StageExecutionPlan plan = createPlan(createFixedSplitSource(60, TestingSplit::createRemoteSplit));
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
    try (StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1)) {
        for (int i = 0; i < 60; i++) {
            ScheduleResult scheduleResult = scheduler.schedule();
            // only finishes when last split is fetched
            if (i == 59) {
                assertEffectivelyFinished(scheduleResult, scheduler);
            } else {
                assertFalse(scheduleResult.isFinished());
            }
            // never blocks
            assertTrue(scheduleResult.getBlocked().isDone());
            // first three splits create new tasks
            assertEquals(scheduleResult.getNewTasks().size(), i < 3 ? 1 : 0);
            assertEquals(stage.getAllTasks().size(), i < 3 ? i + 1 : 3);
            assertPartitionedSplitCount(stage, min(i + 1, 60));
        }
        for (RemoteTask remoteTask : stage.getAllTasks()) {
            assertEquals(remoteTask.getPartitionedSplitCount(), 20);
        }
        stage.abort();
    }
}
Also used : NodeTaskMap(io.prestosql.execution.NodeTaskMap) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) MockRemoteTask(io.prestosql.execution.MockRemoteTaskFactory.MockRemoteTask) RemoteTask(io.prestosql.execution.RemoteTask) SqlStageExecution(io.prestosql.execution.SqlStageExecution) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Test(org.testng.annotations.Test)

Example 5 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestSourcePartitionedScheduler method testScheduleNoSplits.

@Test
public void testScheduleNoSplits() {
    StageExecutionPlan plan = createPlan(createFixedSplitSource(0, TestingSplit::createRemoteSplit));
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
    StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1);
    ScheduleResult scheduleResult = scheduler.schedule();
    assertEquals(scheduleResult.getNewTasks().size(), 1);
    assertEffectivelyFinished(scheduleResult, scheduler);
    stage.abort();
}
Also used : NodeTaskMap(io.prestosql.execution.NodeTaskMap) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) SqlStageExecution(io.prestosql.execution.SqlStageExecution) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Test(org.testng.annotations.Test)

Aggregations

NodeTaskMap (io.prestosql.execution.NodeTaskMap)17 SqlStageExecution (io.prestosql.execution.SqlStageExecution)14 Test (org.testng.annotations.Test)13 StageExecutionPlan (io.prestosql.sql.planner.StageExecutionPlan)12 RemoteTask (io.prestosql.execution.RemoteTask)9 InternalNode (io.prestosql.metadata.InternalNode)9 SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler (io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler)8 Split (io.prestosql.metadata.Split)8 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)8 TestingSplit (io.prestosql.testing.TestingSplit)7 HashMap (java.util.HashMap)6 DynamicFilterService (io.prestosql.dynamicfilter.DynamicFilterService)5 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)5 MockRemoteTask (io.prestosql.execution.MockRemoteTaskFactory.MockRemoteTask)5 SplitKey (io.prestosql.execution.SplitKey)5 SqlStageExecution.createSqlStageExecution (io.prestosql.execution.SqlStageExecution.createSqlStageExecution)5 StageId (io.prestosql.execution.StageId)5 QuerySnapshotManager (io.prestosql.snapshot.QuerySnapshotManager)5 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)5 Map (java.util.Map)5