Search in sources :

Example 76 with ContainerStatus

use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.

the class MockNM method nodeHeartbeat.

public NodeHeartbeatResponse nodeHeartbeat(ApplicationAttemptId attemptId, long containerId, ContainerState containerState) throws Exception {
    ContainerStatus containerStatus = BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(attemptId, containerId), containerState, "Success", 0, BuilderUtils.newResource(memory, vCores));
    ArrayList<ContainerStatus> containerStatusList = new ArrayList<ContainerStatus>(1);
    containerStatusList.add(containerStatus);
    Log.getLog().info("ContainerStatus: " + containerStatus);
    return nodeHeartbeat(containerStatusList, Collections.<Container>emptyList(), true, ++responseId);
}
Also used : NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ArrayList(java.util.ArrayList)

Example 77 with ContainerStatus

use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.

the class TestApplicationCleanup method testContainerCleanup.

@SuppressWarnings("resource")
@Test
public void testContainerCleanup() throws Exception {
    Logger rootLogger = LogManager.getRootLogger();
    rootLogger.setLevel(Level.DEBUG);
    final DrainDispatcher dispatcher = new DrainDispatcher();
    MockRM rm = new MockRM() {

        @Override
        protected Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    rm.start();
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5000);
    RMApp app = rm.submitApp(2000);
    //kick the scheduling
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    am.registerAppAttempt();
    //request for containers
    int request = 2;
    am.allocate("127.0.0.1", 1000, request, new ArrayList<ContainerId>());
    dispatcher.await();
    //kick the scheduler
    nm1.nodeHeartbeat(true);
    List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    int contReceived = conts.size();
    int waitCount = 0;
    while (contReceived < request && waitCount++ < 200) {
        LOG.info("Got " + contReceived + " containers. Waiting to get " + request);
        Thread.sleep(100);
        conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
        dispatcher.await();
        contReceived += conts.size();
        nm1.nodeHeartbeat(true);
    }
    Assert.assertEquals(request, contReceived);
    // Release a container.
    ArrayList<ContainerId> release = new ArrayList<ContainerId>();
    release.add(conts.get(0).getId());
    am.allocate(new ArrayList<ResourceRequest>(), release);
    dispatcher.await();
    // Send one more heartbeat with a fake running container. This is to
    // simulate the situation that can happen if the NM reports that container
    // is running in the same heartbeat when the RM asks it to clean it up.
    Map<ApplicationId, List<ContainerStatus>> containerStatuses = new HashMap<ApplicationId, List<ContainerStatus>>();
    ArrayList<ContainerStatus> containerStatusList = new ArrayList<ContainerStatus>();
    containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
    containerStatuses.put(app.getApplicationId(), containerStatusList);
    NodeHeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
    waitForContainerCleanup(dispatcher, nm1, resp);
    // Now to test the case when RM already gave cleanup, and NM suddenly
    // realizes that the container is running.
    LOG.info("Testing container launch much after release and " + "NM getting cleanup");
    containerStatuses.clear();
    containerStatusList.clear();
    containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
    containerStatuses.put(app.getApplicationId(), containerStatusList);
    resp = nm1.nodeHeartbeat(containerStatuses, true);
    // The cleanup list won't be instantaneous as it is given out by scheduler
    // and not RMNodeImpl.
    waitForContainerCleanup(dispatcher, nm1, resp);
    rm.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Container(org.apache.hadoop.yarn.api.records.Container) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ArrayList(java.util.ArrayList) List(java.util.List) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 78 with ContainerStatus

use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.

the class AbstractYarnScheduler method nodeUpdate.

/**
   * Process a heartbeat update from a node.
   * @param nm The RMNode corresponding to the NodeManager
   */
protected void nodeUpdate(RMNode nm) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + getClusterResource());
    }
    // Process new container information
    List<ContainerStatus> completedContainers = updateNewContainerInfo(nm);
    // Process completed containers
    Resource releasedResources = Resource.newInstance(0, 0);
    int releasedContainers = updateCompletedContainers(completedContainers, releasedResources, nm.getNodeID());
    // update is propagated
    if (nm.getState() == NodeState.DECOMMISSIONING) {
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeResourceUpdateEvent(nm.getNodeID(), ResourceOption.newInstance(getSchedulerNode(nm.getNodeID()).getAllocatedResource(), 0)));
    }
    updateSchedulerHealthInformation(releasedResources, releasedContainers);
    updateNodeResourceUtilization(nm);
    // Now node data structures are up-to-date and ready for scheduling.
    if (LOG.isDebugEnabled()) {
        SchedulerNode node = getNode(nm.getNodeID());
        LOG.debug("Node being looked for scheduling " + nm + " availableResource: " + node.getUnallocatedResource());
    }
}
Also used : NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) Resource(org.apache.hadoop.yarn.api.records.Resource) RMNodeResourceUpdateEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeResourceUpdateEvent)

Example 79 with ContainerStatus

use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.

the class AbstractYarnScheduler method updateNewContainerInfo.

/**
   * Get lists of new containers from NodeManager and process them.
   * @param nm The RMNode corresponding to the NodeManager
   * @return list of completed containers
   */
protected List<ContainerStatus> updateNewContainerInfo(RMNode nm) {
    SchedulerNode node = getNode(nm.getNodeID());
    List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates();
    List<ContainerStatus> newlyLaunchedContainers = new ArrayList<>();
    List<ContainerStatus> completedContainers = new ArrayList<>();
    for (UpdatedContainerInfo containerInfo : containerInfoList) {
        newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers());
        completedContainers.addAll(containerInfo.getCompletedContainers());
    }
    // Processing the newly launched containers
    for (ContainerStatus launchedContainer : newlyLaunchedContainers) {
        containerLaunchedOnNode(launchedContainer.getContainerId(), node);
    }
    // Processing the newly increased containers
    List<Container> newlyIncreasedContainers = nm.pullNewlyIncreasedContainers();
    for (Container container : newlyIncreasedContainers) {
        containerIncreasedOnNode(container.getId(), node, container);
    }
    return completedContainers;
}
Also used : NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) UpdatedContainerInfo(org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo) ArrayList(java.util.ArrayList)

Example 80 with ContainerStatus

use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.

the class TestRMAppLogAggregationStatus method testLogAggregationStatus.

@Test
public void testLogAggregationStatus() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
    conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
    RMApp rmApp = createRMApp(conf);
    this.rmContext.getRMApps().put(appId, rmApp);
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
    rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
    // This application will be running on two nodes
    NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
    Resource capability = Resource.newInstance(4096, 4);
    RMNodeImpl node1 = new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
    node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
    rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
    NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
    RMNodeImpl node2 = new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
    node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
    rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
    // The initial log aggregation status for these two nodes
    // should be NOT_STARTED
    Map<NodeId, LogAggregationReport> logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue().getLogAggregationStatus());
    }
    List<LogAggregationReport> node1ReportForApp = new ArrayList<LogAggregationReport>();
    String messageForNode1_1 = "node1 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report1 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_1);
    node1ReportForApp.add(report1);
    NodeStatus nodeStatus1 = NodeStatus.newInstance(node1.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp));
    List<LogAggregationReport> node2ReportForApp = new ArrayList<LogAggregationReport>();
    String messageForNode2_1 = "node2 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode2_1);
    node2ReportForApp.add(report2);
    NodeStatus nodeStatus2 = NodeStatus.newInstance(node2.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
    node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp));
    // node1 and node2 has updated its log aggregation status
    // verify that the log aggregation status for node1, node2
    // has been changed
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode1_1, report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // node1 updates its log aggregation status again
    List<LogAggregationReport> node1ReportForApp2 = new ArrayList<LogAggregationReport>();
    String messageForNode1_2 = "node1 logAggregation status updated at " + System.currentTimeMillis();
    LogAggregationReport report1_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_2);
    node1ReportForApp2.add(report1_2);
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp2));
    // verify that the log aggregation status for node1
    // has been changed
    // verify that the log aggregation status for node2
    // does not change
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode1_1 + "\n" + messageForNode1_2, report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
            Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // kill the application
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
    rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
    Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
    // wait for 1500 ms
    Thread.sleep(1500);
    // the log aggregation status for both nodes should be changed
    // to TIME_OUT
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
    }
    // Finally, node1 finished its log aggregation and sent out its final
    // log aggregation status. The log aggregation status for node1 should
    // be changed from TIME_OUT to SUCCEEDED
    List<LogAggregationReport> node1ReportForApp3 = new ArrayList<LogAggregationReport>();
    LogAggregationReport report1_3;
    for (int i = 0; i < 10; i++) {
        report1_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, "test_message_" + i);
        node1ReportForApp3.add(report1_3);
    }
    node1ReportForApp3.add(LogAggregationReport.newInstance(appId, LogAggregationStatus.SUCCEEDED, ""));
    // For every logAggregationReport cached in memory, we can only save at most
    // 10 diagnostic messages/failure messages
    node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp3));
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertEquals(2, logAggregationStatus.size());
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
    Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
    for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
        if (report.getKey().equals(node1.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.SUCCEEDED, report.getValue().getLogAggregationStatus());
            StringBuilder builder = new StringBuilder();
            for (int i = 0; i < 9; i++) {
                builder.append("test_message_" + i);
                builder.append("\n");
            }
            builder.append("test_message_" + 9);
            Assert.assertEquals(builder.toString(), report.getValue().getDiagnosticMessage());
        } else if (report.getKey().equals(node2.getNodeID())) {
            Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
        } else {
            // should not contain log aggregation report for other nodes
            Assert.fail("should not contain log aggregation report for other nodes");
        }
    }
    // update log aggregationStatus for node2 as FAILED,
    // so the log aggregation status for the App will become FAILED,
    // and we only keep the log aggregation reports whose status is FAILED,
    // so the log aggregation report for node1 will be removed.
    List<LogAggregationReport> node2ReportForApp2 = new ArrayList<LogAggregationReport>();
    LogAggregationReport report2_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING_WITH_FAILURE, "Fail_Message");
    LogAggregationReport report2_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.FAILED, "");
    node2ReportForApp2.add(report2_2);
    node2ReportForApp2.add(report2_3);
    node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp2));
    Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
    logAggregationStatus = rmApp.getLogAggregationReportsForApp();
    Assert.assertTrue(logAggregationStatus.size() == 1);
    Assert.assertTrue(logAggregationStatus.containsKey(node2.getNodeID()));
    Assert.assertTrue(!logAggregationStatus.containsKey(node1.getNodeID()));
    Assert.assertEquals("Fail_Message", ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(nodeId2));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) RMAppRunningOnNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) ArrayList(java.util.ArrayList) RMAppEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent) RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) LogAggregationReport(org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus) Test(org.junit.Test)

Aggregations

ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)144 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)76 Test (org.junit.Test)75 ArrayList (java.util.ArrayList)58 Container (org.apache.hadoop.yarn.api.records.Container)40 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)28 NodeId (org.apache.hadoop.yarn.api.records.NodeId)26 HashMap (java.util.HashMap)25 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)25 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)23 Configuration (org.apache.hadoop.conf.Configuration)21 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)21 Resource (org.apache.hadoop.yarn.api.records.Resource)21 GetContainerStatusesRequest (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest)20 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)20 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)19 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)18 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)18 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)17 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)14