Search in sources :

Example 11 with NodeHeartbeatResponse

use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.

the class TestResourceTrackerService method testInvalidNMUnregistration.

@Test
public void testInvalidNMUnregistration() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();
    ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
    int decommisionedNMsCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();
    // Node not found for unregister
    UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class);
    request.setNodeId(BuilderUtils.newNodeId("host", 1234));
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, 0);
    checkDecommissionedNMCount(rm, 0);
    // 1. Register the Node Manager
    // 2. Exclude the same Node Manager host
    // 3. Give NM heartbeat to RM
    // 4. Unregister the Node Manager
    MockNM nm1 = new MockNM("host1:1234", 5120, resourceTrackerService);
    RegisterNodeManagerResponse response = nm1.registerNode();
    Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction());
    int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs();
    writeToHostsFile("host2");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    NodeHeartbeatResponse heartbeatResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.SHUTDOWN, heartbeatResponse.getNodeAction());
    checkDecommissionedNMCount(rm, decommisionedNMsCount);
    request.setNodeId(nm1.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, ++shutdownNMsCount);
    checkDecommissionedNMCount(rm, decommisionedNMsCount);
    // 1. Register the Node Manager
    // 2. Exclude the same Node Manager host
    // 3. Unregister the Node Manager
    MockNM nm2 = new MockNM("host2:1234", 5120, resourceTrackerService);
    RegisterNodeManagerResponse response2 = nm2.registerNode();
    Assert.assertEquals(NodeAction.NORMAL, response2.getNodeAction());
    writeToHostsFile("host1");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    request.setNodeId(nm2.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, ++shutdownNMsCount);
    checkDecommissionedNMCount(rm, decommisionedNMsCount);
    rm.stop();
}
Also used : UnRegisterNodeManagerRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerRequest) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) RegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse) Test(org.junit.Test)

Example 12 with NodeHeartbeatResponse

use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.

the class TestResourceTrackerService method testNMUnregistration.

@Test
public void testNMUnregistration() throws Exception {
    Configuration conf = new Configuration();
    rm = new MockRM(conf);
    rm.start();
    ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    UnRegisterNodeManagerRequest request = Records.newRecord(UnRegisterNodeManagerRequest.class);
    request.setNodeId(nm1.getNodeId());
    resourceTrackerService.unRegisterNodeManager(request);
    checkShutdownNMCount(rm, ++shutdownNMsCount);
    // The RM should remove the node after unregistration, hence send a reboot
    // command.
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction()));
}
Also used : UnRegisterNodeManagerRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerRequest) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) Test(org.junit.Test)

Example 13 with NodeHeartbeatResponse

use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.

the class TestSignalContainer method testSignalRequestDeliveryToNM.

@Test
public void testSignalRequestDeliveryToNM() throws Exception {
    Logger rootLogger = LogManager.getRootLogger();
    rootLogger.setLevel(Level.DEBUG);
    MockRM rm = new MockRM();
    rm.start();
    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    RMApp app = rm.submitApp(2000);
    //kick the scheduling
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    am.registerAppAttempt();
    //request for containers
    final int request = 2;
    am.allocate("h1", 1000, request, new ArrayList<ContainerId>());
    //kick the scheduler
    nm1.nodeHeartbeat(true);
    List<Container> conts = null;
    int contReceived = 0;
    int waitCount = 0;
    while (contReceived < request && waitCount++ < 200) {
        LOG.info("Got " + contReceived + " containers. Waiting to get " + request);
        Thread.sleep(100);
        conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
        contReceived += conts.size();
    }
    Assert.assertEquals(request, contReceived);
    for (Container container : conts) {
        rm.signalToContainer(container.getId(), SignalContainerCommand.OUTPUT_THREAD_DUMP);
    }
    NodeHeartbeatResponse resp;
    List<SignalContainerRequest> contsToSignal;
    int signaledConts = 0;
    waitCount = 0;
    while (signaledConts < request && waitCount++ < 200) {
        LOG.info("Waiting to get signalcontainer events.. signaledConts: " + signaledConts);
        resp = nm1.nodeHeartbeat(true);
        contsToSignal = resp.getContainersToSignalList();
        signaledConts += contsToSignal.size();
        Thread.sleep(100);
    }
    // Verify NM receives the expected number of signal container requests.
    Assert.assertEquals(request, signaledConts);
    am.unregisterAppAttempt();
    nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, ContainerState.COMPLETE);
    rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) SignalContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest) Logger(org.apache.log4j.Logger) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 14 with NodeHeartbeatResponse

use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.

the class TestRMNodeTransitions method getMockRMNodeStatusEvent.

private RMNodeStatusEvent getMockRMNodeStatusEvent(List<ContainerStatus> containerStatus) {
    NodeHeartbeatResponse response = mock(NodeHeartbeatResponse.class);
    NodeHealthStatus healthStatus = mock(NodeHealthStatus.class);
    Boolean yes = new Boolean(true);
    doReturn(yes).when(healthStatus).getIsNodeHealthy();
    RMNodeStatusEvent event = mock(RMNodeStatusEvent.class);
    doReturn(healthStatus).when(event).getNodeHealthStatus();
    doReturn(response).when(event).getLatestResponse();
    doReturn(RMNodeEventType.STATUS_UPDATE).when(event).getType();
    if (containerStatus != null) {
        doReturn(containerStatus).when(event).getContainers();
    }
    return event;
}
Also used : NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) NodeHealthStatus(org.apache.hadoop.yarn.server.api.records.NodeHealthStatus)

Example 15 with NodeHeartbeatResponse

use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.

the class TestRMNodeTransitions method testForHandlingDuplicatedCompltedContainers.

@Test
public void testForHandlingDuplicatedCompltedContainers() {
    // Start the node
    node.handle(new RMNodeStartedEvent(null, null, null));
    // Add info to the queue first
    node.setNextHeartBeat(false);
    ContainerId completedContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
    RMNodeStatusEvent statusEvent1 = getMockRMNodeStatusEvent(null);
    ContainerStatus containerStatus1 = mock(ContainerStatus.class);
    doReturn(completedContainerId1).when(containerStatus1).getContainerId();
    doReturn(Collections.singletonList(containerStatus1)).when(statusEvent1).getContainers();
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    node.handle(statusEvent1);
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    Assert.assertEquals(1, node.getQueueSize());
    Assert.assertEquals(1, node.getCompletedContainers().size());
    // test for duplicate entries
    node.handle(statusEvent1);
    Assert.assertEquals(1, node.getQueueSize());
    // send clean up container event
    node.handle(new RMNodeFinishedContainersPulledByAMEvent(node.getNodeID(), Collections.singletonList(completedContainerId1)));
    NodeHeartbeatResponse hbrsp = Records.newRecord(NodeHeartbeatResponse.class);
    node.updateNodeHeartbeatResponseForCleanup(hbrsp);
    Assert.assertEquals(1, hbrsp.getContainersToBeRemovedFromNM().size());
    Assert.assertEquals(0, node.getCompletedContainers().size());
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMNodeFinishedContainersPulledByAMEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent) Test(org.junit.Test)

Aggregations

NodeHeartbeatResponse (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse)49 Test (org.junit.Test)33 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)23 Configuration (org.apache.hadoop.conf.Configuration)21 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)16 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)13 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)12 ArrayList (java.util.ArrayList)10 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)9 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)8 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)7 NodeHeartbeatRequest (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest)7 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)7 RMNodeStatusEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent)7 Container (org.apache.hadoop.yarn.api.records.Container)6 Resource (org.apache.hadoop.yarn.api.records.Resource)6 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)6 NodeHealthStatus (org.apache.hadoop.yarn.server.api.records.NodeHealthStatus)6 ByteBuffer (java.nio.ByteBuffer)5 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)5