Search in sources :

Example 6 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method testContainerUpdate.

@Test(timeout = 5000)
public void testContainerUpdate() throws InterruptedException {
    //Start the node
    node.handle(new RMNodeStartedEvent(null, null, null));
    NodeId nodeId = BuilderUtils.newNodeId("localhost:1", 1);
    RMNodeImpl node2 = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
    node2.handle(new RMNodeStartedEvent(null, null, null));
    ApplicationId app0 = BuilderUtils.newApplicationId(0, 0);
    ApplicationId app1 = BuilderUtils.newApplicationId(1, 1);
    ContainerId completedContainerIdFromNode1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(app0, 0), 0);
    ContainerId completedContainerIdFromNode2_1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(app1, 1), 1);
    ContainerId completedContainerIdFromNode2_2 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(app1, 1), 2);
    rmContext.getRMApps().put(app0, Mockito.mock(RMApp.class));
    rmContext.getRMApps().put(app1, Mockito.mock(RMApp.class));
    RMNodeStatusEvent statusEventFromNode1 = getMockRMNodeStatusEvent(null);
    RMNodeStatusEvent statusEventFromNode2_1 = getMockRMNodeStatusEvent(null);
    RMNodeStatusEvent statusEventFromNode2_2 = getMockRMNodeStatusEvent(null);
    ContainerStatus containerStatusFromNode1 = mock(ContainerStatus.class);
    ContainerStatus containerStatusFromNode2_1 = mock(ContainerStatus.class);
    ContainerStatus containerStatusFromNode2_2 = mock(ContainerStatus.class);
    doReturn(completedContainerIdFromNode1).when(containerStatusFromNode1).getContainerId();
    doReturn(Collections.singletonList(containerStatusFromNode1)).when(statusEventFromNode1).getContainers();
    node.handle(statusEventFromNode1);
    Assert.assertEquals(1, completedContainers.size());
    Assert.assertEquals(completedContainerIdFromNode1, completedContainers.get(0).getContainerId());
    completedContainers.clear();
    doReturn(completedContainerIdFromNode2_1).when(containerStatusFromNode2_1).getContainerId();
    doReturn(Collections.singletonList(containerStatusFromNode2_1)).when(statusEventFromNode2_1).getContainers();
    doReturn(completedContainerIdFromNode2_2).when(containerStatusFromNode2_2).getContainerId();
    doReturn(Collections.singletonList(containerStatusFromNode2_2)).when(statusEventFromNode2_2).getContainers();
    node2.setNextHeartBeat(false);
    node2.handle(statusEventFromNode2_1);
    node2.setNextHeartBeat(true);
    node2.handle(statusEventFromNode2_2);
    Assert.assertEquals(2, completedContainers.size());
    Assert.assertEquals(completedContainerIdFromNode2_1, completedContainers.get(0).getContainerId());
    Assert.assertEquals(completedContainerIdFromNode2_2, completedContainers.get(1).getContainerId());
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 7 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method testExpiredContainer.

@Test(timeout = 5000)
public void testExpiredContainer() {
    // Start the node
    node.handle(new RMNodeStartedEvent(null, null, null));
    verify(scheduler).handle(any(NodeAddedSchedulerEvent.class));
    // Expire a container
    ContainerId completedContainerId = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
    node.handle(new RMNodeCleanContainerEvent(null, completedContainerId));
    Assert.assertEquals(1, node.getContainersToCleanUp().size());
    // Now verify that scheduler isn't notified of an expired container
    // by checking number of 'completedContainers' it got in the previous event
    RMNodeStatusEvent statusEvent = getMockRMNodeStatusEvent(null);
    ContainerStatus containerStatus = mock(ContainerStatus.class);
    doReturn(completedContainerId).when(containerStatus).getContainerId();
    doReturn(Collections.singletonList(containerStatus)).when(statusEvent).getContainers();
    node.handle(statusEvent);
    /* Expect the scheduler call handle function 2 times
     * 1. RMNode status from new to Running, handle the add_node event
     * 2. handle the node update event
     */
    verify(scheduler, times(2)).handle(any(NodeUpdateSchedulerEvent.class));
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMNodeCleanContainerEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent) Test(org.junit.Test)

Example 8 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method getRebootedNode.

private RMNodeImpl getRebootedNode() {
    NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
    Resource capability = Resource.newInstance(4096, 4);
    RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability, null);
    node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null));
    Assert.assertEquals(NodeState.RUNNING, node.getState());
    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.REBOOTING));
    Assert.assertEquals(NodeState.REBOOTED, node.getState());
    return node;
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)

Example 9 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class MockRM method sendNodeStarted.

public void sendNodeStarted(MockNM nm) throws Exception {
    RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(nm.getNodeId());
    node.handle(new RMNodeStartedEvent(nm.getNodeId(), null, null));
    drainEventsImplicitly();
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)

Example 10 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMAppLogAggregationStatus method testLogAggregationStatus.

@Test
public void testLogAggregationStatus() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
    conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
    RMApp rmApp = createRMApp(conf);
    this.rmContext.getRMApps().put(appId, rmApp);
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
    // This application will be running on two nodes
    NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
    Resource capability = Resource.newInstance(4096, 4);
    RMNodeImpl node1 = new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
    node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
    rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
    NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
    RMNodeImpl node2 = new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
    node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
    rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
    // The initial log aggregation status for these two nodes
    // should be NOT_STARTED
    Map<NodeId, LogAggregationReport> logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue().getLogAggregationStatus());
    }
    List<LogAggregationReport> node1ReportForApp = new ArrayList<LogAggregationReport>();
    String messageForNode1_1 = "node1 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report1 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_1);
    node1ReportForApp.add(report1);
    NodeStatus nodeStatus1 = NodeStatus.newInstance(node1.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp));
    List<LogAggregationReport> node2ReportForApp = new ArrayList<LogAggregationReport>();
    String messageForNode2_1 = "node2 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode2_1);
    node2ReportForApp.add(report2);
    NodeStatus nodeStatus2 = NodeStatus.newInstance(node2.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
    node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp));
    // node1 and node2 has updated its log aggregation status
    // verify that the log aggregation status for node1, node2
    // has been changed
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode1_1, report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // node1 updates its log aggregation status again
    List<LogAggregationReport> node1ReportForApp2 = new ArrayList<LogAggregationReport>();
    String messageForNode1_2 = "node1 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report1_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_2);
    node1ReportForApp2.add(report1_2);
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp2));
    // verify that the log aggregation status for node1
    // has been changed
    // verify that the log aggregation status for node2
    // does not change
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode1_1 + "\n" + messageForNode1_2, report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // kill the application
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
    Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
    // wait for 1500 ms
    Thread.sleep(1500);
    // the log aggregation status for both nodes should be changed
    // to TIME_OUT
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
    }
    // Finally, node1 finished its log aggregation and sent out its final
    // log aggregation status. The log aggregation status for node1 should
    // be changed from TIME_OUT to SUCCEEDED
    List<LogAggregationReport> node1ReportForApp3 = new ArrayList<LogAggregationReport>();
    LogAggregationReport report1_3;
    for (int i = 0; i < 10; i++) {
        report1_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, "test_message_" + i);
        node1ReportForApp3.add(report1_3);
    }
    node1ReportForApp3.add(LogAggregationReport.newInstance(appId, LogAggregationStatus.SUCCEEDED, ""));
    // For every logAggregationReport cached in memory, we can only save at most
    // 10 diagnostic messages/failure messages
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp3));
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.SUCCEEDED, report.getValue().getLogAggregationStatus());
            StringBuilder builder = new StringBuilder();
            for (int i = 0; i < 9; i++) {
                builder.append("test_message_" + i);
                builder.append("\n");
            }
            builder.append("test_message_" + 9);
            Assert.assertEquals(builder.toString(), report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // update log aggregationStatus for node2 as FAILED,
    // so the log aggregation status for the App will become FAILED,
    // and we only keep the log aggregation reports whose status is FAILED,
    // so the log aggregation report for node1 will be removed.
    List<LogAggregationReport> node2ReportForApp2 = new ArrayList<LogAggregationReport>();
    LogAggregationReport report2_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING_WITH_FAILURE, "Fail_Message");
    LogAggregationReport report2_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.FAILED, "");
    node2ReportForApp2.add(report2_2);
    node2ReportForApp2.add(report2_3);
    node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp2));
    Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertTrue(logAggregationStatus.size() == 1);
    Assert.assertTrue(logAggregationStatus.containsKey(node2.getNodeID()));
    Assert.assertTrue(!logAggregationStatus.containsKey(node1.getNodeID()));
    Assert.assertEquals("Fail_Message", ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(nodeId2));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) RMAppRunningOnNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) ArrayList(java.util.ArrayList) RMAppEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent) RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) LogAggregationReport(org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus) Test(org.junit.Test)

Aggregations

RMNodeStartedEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent)10 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)7 Test (org.junit.Test)6 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)5 NodeId (org.apache.hadoop.yarn.api.records.NodeId)5 RMNodeStatusEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent)5 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)4 Resource (org.apache.hadoop.yarn.api.records.Resource)4 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)3 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)2 RMNodeEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1 LogAggregationReport (org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport)1 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)1 NodeHeartbeatResponse (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse)1 RegisterNodeManagerResponse (org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse)1 UnRegisterNodeManagerResponse (org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse)1