use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.
the class MockNM method registerNode.
public RegisterNodeManagerResponse registerNode(List<NMContainerStatus> containerReports, List<ApplicationId> runningApplications) throws Exception {
RegisterNodeManagerRequest req = Records.newRecord(RegisterNodeManagerRequest.class);
req.setNodeId(nodeId);
req.setHttpPort(httpPort);
Resource resource = BuilderUtils.newResource(memory, vCores);
req.setResource(resource);
req.setContainerStatuses(containerReports);
req.setNMVersion(version);
req.setRunningApplications(runningApplications);
RegisterNodeManagerResponse registrationResponse = resourceTracker.registerNodeManager(req);
this.currentContainerTokenMasterKey = registrationResponse.getContainerTokenMasterKey();
this.currentNMTokenMasterKey = registrationResponse.getNMTokenMasterKey();
Resource newResource = registrationResponse.getResource();
if (newResource != null) {
memory = (int) newResource.getMemorySize();
vCores = newResource.getVirtualCores();
}
containerStats.clear();
if (containerReports != null) {
for (NMContainerStatus report : containerReports) {
if (report.getContainerState() != ContainerState.COMPLETE) {
containerStats.put(report.getContainerId(), ContainerStatus.newInstance(report.getContainerId(), report.getContainerState(), report.getDiagnostics(), report.getContainerExitStatus()));
}
}
}
return registrationResponse;
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.
the class TestApplicationCleanup method createNMContainerStatusForApp.
public static List<NMContainerStatus> createNMContainerStatusForApp(MockAM am) {
List<NMContainerStatus> list = new ArrayList<NMContainerStatus>();
NMContainerStatus amContainer = createNMContainerStatus(am.getApplicationAttemptId(), 1, ContainerState.RUNNING, 1024);
NMContainerStatus completedContainer = createNMContainerStatus(am.getApplicationAttemptId(), 2, ContainerState.COMPLETE, 2048);
list.add(amContainer);
list.add(completedContainer);
return list;
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.
the class TestAMRestart method testRMRestartOrFailoverNotCountedForAMFailures.
// Test regular RM restart/failover, new RM should not count
// AM failure towards the max-retry-account and should be able to
// re-launch the AM.
@Test(timeout = 50000)
public void testRMRestartOrFailoverNotCountedForAMFailures() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
// explicitly set max-am-retry count as 1.
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
// AM should be restarted even though max-am-attempt is 1.
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
// Restart rm.
MockRM rm2 = new MockRM(conf, memStore);
rm2.start();
ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
// re-register the NM
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
NMContainerStatus status = Records.newRecord(NMContainerStatus.class);
status.setContainerExitStatus(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
status.setContainerId(attempt1.getMasterContainer().getId());
status.setContainerState(ContainerState.COMPLETE);
status.setDiagnostics("");
nm1.registerNode(Collections.singletonList(status), null);
rm2.waitForState(attempt1.getAppAttemptId(), RMAppAttemptState.FAILED);
Assert.assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, appState.getAttempt(am1.getApplicationAttemptId()).getAMContainerExitStatus());
// Will automatically start a new AppAttempt in rm2
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am2 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am2);
RMAppAttempt attempt3 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()).getCurrentAppAttempt();
Assert.assertTrue(attempt3.shouldCountTowardsMaxAttemptRetry());
Assert.assertEquals(ContainerExitStatus.INVALID, appState.getAttempt(am2.getApplicationAttemptId()).getAMContainerExitStatus());
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.
the class TestWorkPreservingRMRestart method testContainerCompleteMsgNotLostAfterAMFailedAndRMRestart.
@Test(timeout = 20000)
public void testContainerCompleteMsgNotLostAfterAMFailedAndRMRestart() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
// submit app with keepContainersAcrossApplicationAttempts true
Resource resource = Records.newRecord(Resource.class);
resource.setMemorySize(200);
RMApp app0 = rm1.submitApp(resource, "", UserGroupInformation.getCurrentUser().getShortUserName(), null, false, null, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, null, null, true, true, false, null, 0, null, true, null);
MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
am0.allocate("127.0.0.1", 1000, 2, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// am failed,and relaunch it
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
MockAM am1 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
// rm failover
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// container launched by first am completed
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 2, ContainerState.COMPLETE);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 3, ContainerState.RUNNING);
nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
Thread.sleep(200);
// check whether current am could get containerCompleteMsg
RMApp recoveredApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
RMAppAttempt loadedAttempt1 = recoveredApp0.getCurrentAppAttempt();
assertEquals(1, loadedAttempt1.getJustFinishedContainers().size());
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.
the class TestWorkPreservingRMRestart method testContainersNotRecoveredForCompletedApps.
// Apps already completed before RM restart. Restarted RM scheduler should not
// recover containers for completed apps.
@Test(timeout = 20000)
public void testContainersNotRecoveredForCompletedApps() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockRM.finishAMAndVerifyAppState(app1, rm1, nm1, am1);
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(runningContainer, completedContainer), null);
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
assertEquals(RMAppState.FINISHED, recoveredApp1.getState());
// Wait for RM to settle down on recovering containers;
Thread.sleep(3000);
YarnScheduler scheduler = rm2.getResourceScheduler();
// scheduler should not recover containers for finished apps.
assertNull(scheduler.getRMContainer(runningContainer.getContainerId()));
assertNull(scheduler.getRMContainer(completedContainer.getContainerId()));
}
Aggregations