use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class MockNM method nodeHeartbeat.
public NodeHeartbeatResponse nodeHeartbeat(ApplicationAttemptId attemptId, long containerId, ContainerState containerState) throws Exception {
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(attemptId, containerId), containerState, "Success", 0, BuilderUtils.newResource(memory, vCores));
ArrayList<ContainerStatus> containerStatusList = new ArrayList<ContainerStatus>(1);
containerStatusList.add(containerStatus);
Log.getLog().info("ContainerStatus: " + containerStatus);
return nodeHeartbeat(containerStatusList, Collections.<Container>emptyList(), true, ++responseId);
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class TestApplicationCleanup method testContainerCleanup.
@SuppressWarnings("resource")
@Test
public void testContainerCleanup() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = new MockRM() {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5000);
RMApp app = rm.submitApp(2000);
//kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
am.registerAppAttempt();
//request for containers
int request = 2;
am.allocate("127.0.0.1", 1000, request, new ArrayList<ContainerId>());
dispatcher.await();
//kick the scheduler
nm1.nodeHeartbeat(true);
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size();
int waitCount = 0;
while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get " + request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
dispatcher.await();
contReceived += conts.size();
nm1.nodeHeartbeat(true);
}
Assert.assertEquals(request, contReceived);
// Release a container.
ArrayList<ContainerId> release = new ArrayList<ContainerId>();
release.add(conts.get(0).getId());
am.allocate(new ArrayList<ResourceRequest>(), release);
dispatcher.await();
// Send one more heartbeat with a fake running container. This is to
// simulate the situation that can happen if the NM reports that container
// is running in the same heartbeat when the RM asks it to clean it up.
Map<ApplicationId, List<ContainerStatus>> containerStatuses = new HashMap<ApplicationId, List<ContainerStatus>>();
ArrayList<ContainerStatus> containerStatusList = new ArrayList<ContainerStatus>();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
NodeHeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
waitForContainerCleanup(dispatcher, nm1, resp);
// Now to test the case when RM already gave cleanup, and NM suddenly
// realizes that the container is running.
LOG.info("Testing container launch much after release and " + "NM getting cleanup");
containerStatuses.clear();
containerStatusList.clear();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
resp = nm1.nodeHeartbeat(containerStatuses, true);
// The cleanup list won't be instantaneous as it is given out by scheduler
// and not RMNodeImpl.
waitForContainerCleanup(dispatcher, nm1, resp);
rm.stop();
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class AbstractYarnScheduler method nodeUpdate.
/**
* Process a heartbeat update from a node.
* @param nm The RMNode corresponding to the NodeManager
*/
protected void nodeUpdate(RMNode nm) {
if (LOG.isDebugEnabled()) {
LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + getClusterResource());
}
// Process new container information
List<ContainerStatus> completedContainers = updateNewContainerInfo(nm);
// Process completed containers
Resource releasedResources = Resource.newInstance(0, 0);
int releasedContainers = updateCompletedContainers(completedContainers, releasedResources, nm.getNodeID());
// update is propagated
if (nm.getState() == NodeState.DECOMMISSIONING) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeResourceUpdateEvent(nm.getNodeID(), ResourceOption.newInstance(getSchedulerNode(nm.getNodeID()).getAllocatedResource(), 0)));
}
updateSchedulerHealthInformation(releasedResources, releasedContainers);
updateNodeResourceUtilization(nm);
// Now node data structures are up-to-date and ready for scheduling.
if (LOG.isDebugEnabled()) {
SchedulerNode node = getNode(nm.getNodeID());
LOG.debug("Node being looked for scheduling " + nm + " availableResource: " + node.getUnallocatedResource());
}
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class AbstractYarnScheduler method updateNewContainerInfo.
/**
* Get lists of new containers from NodeManager and process them.
* @param nm The RMNode corresponding to the NodeManager
* @return list of completed containers
*/
protected List<ContainerStatus> updateNewContainerInfo(RMNode nm) {
SchedulerNode node = getNode(nm.getNodeID());
List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates();
List<ContainerStatus> newlyLaunchedContainers = new ArrayList<>();
List<ContainerStatus> completedContainers = new ArrayList<>();
for (UpdatedContainerInfo containerInfo : containerInfoList) {
newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers());
completedContainers.addAll(containerInfo.getCompletedContainers());
}
// Processing the newly launched containers
for (ContainerStatus launchedContainer : newlyLaunchedContainers) {
containerLaunchedOnNode(launchedContainer.getContainerId(), node);
}
// Processing the newly increased containers
List<Container> newlyIncreasedContainers = nm.pullNewlyIncreasedContainers();
for (Container container : newlyIncreasedContainers) {
containerIncreasedOnNode(container.getId(), node, container);
}
return completedContainers;
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class TestRMAppLogAggregationStatus method testLogAggregationStatus.
@Test
public void testLogAggregationStatus() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
RMApp rmApp = createRMApp(conf);
this.rmContext.getRMApps().put(appId, rmApp);
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
// This application will be running on two nodes
NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
Resource capability = Resource.newInstance(4096, 4);
RMNodeImpl node1 = new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
RMNodeImpl node2 = new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
// The initial log aggregation status for these two nodes
// should be NOT_STARTED
Map<NodeId, LogAggregationReport> logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue().getLogAggregationStatus());
}
List<LogAggregationReport> node1ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode1_1 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_1);
node1ReportForApp.add(report1);
NodeStatus nodeStatus1 = NodeStatus.newInstance(node1.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp));
List<LogAggregationReport> node2ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode2_1 = "node2 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode2_1);
node2ReportForApp.add(report2);
NodeStatus nodeStatus2 = NodeStatus.newInstance(node2.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp));
// node1 and node2 has updated its log aggregation status
// verify that the log aggregation status for node1, node2
// has been changed
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// node1 updates its log aggregation status again
List<LogAggregationReport> node1ReportForApp2 = new ArrayList<LogAggregationReport>();
String messageForNode1_2 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_2);
node1ReportForApp2.add(report1_2);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp2));
// verify that the log aggregation status for node1
// has been changed
// verify that the log aggregation status for node2
// does not change
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1 + "\n" + messageForNode1_2, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// kill the application
rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
// wait for 1500 ms
Thread.sleep(1500);
// the log aggregation status for both nodes should be changed
// to TIME_OUT
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
}
// Finally, node1 finished its log aggregation and sent out its final
// log aggregation status. The log aggregation status for node1 should
// be changed from TIME_OUT to SUCCEEDED
List<LogAggregationReport> node1ReportForApp3 = new ArrayList<LogAggregationReport>();
LogAggregationReport report1_3;
for (int i = 0; i < 10; i++) {
report1_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, "test_message_" + i);
node1ReportForApp3.add(report1_3);
}
node1ReportForApp3.add(LogAggregationReport.newInstance(appId, LogAggregationStatus.SUCCEEDED, ""));
// For every logAggregationReport cached in memory, we can only save at most
// 10 diagnostic messages/failure messages
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp3));
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.SUCCEEDED, report.getValue().getLogAggregationStatus());
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 9; i++) {
builder.append("test_message_" + i);
builder.append("\n");
}
builder.append("test_message_" + 9);
Assert.assertEquals(builder.toString(), report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// update log aggregationStatus for node2 as FAILED,
// so the log aggregation status for the App will become FAILED,
// and we only keep the log aggregation reports whose status is FAILED,
// so the log aggregation report for node1 will be removed.
List<LogAggregationReport> node2ReportForApp2 = new ArrayList<LogAggregationReport>();
LogAggregationReport report2_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING_WITH_FAILURE, "Fail_Message");
LogAggregationReport report2_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.FAILED, "");
node2ReportForApp2.add(report2_2);
node2ReportForApp2.add(report2_3);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp2));
Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertTrue(logAggregationStatus.size() == 1);
Assert.assertTrue(logAggregationStatus.containsKey(node2.getNodeID()));
Assert.assertTrue(!logAggregationStatus.containsKey(node1.getNodeID()));
Assert.assertEquals("Fail_Message", ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(nodeId2));
}
Aggregations