use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class AbstractYarnScheduler method getTransferredContainers.
/*
* YARN-3136 removed synchronized lock for this method for performance
* purposes
*/
public List<Container> getTransferredContainers(ApplicationAttemptId currentAttempt) {
ApplicationId appId = currentAttempt.getApplicationId();
SchedulerApplication<T> app = applications.get(appId);
List<Container> containerList = new ArrayList<Container>();
RMApp appImpl = this.rmContext.getRMApps().get(appId);
if (appImpl.getApplicationSubmissionContext().getUnmanagedAM()) {
return containerList;
}
if (app == null) {
return containerList;
}
Collection<RMContainer> liveContainers = app.getCurrentAppAttempt().getLiveContainers();
ContainerId amContainerId = rmContext.getRMApps().get(appId).getCurrentAppAttempt().getMasterContainer().getId();
for (RMContainer rmContainer : liveContainers) {
if (!rmContainer.getContainerId().equals(amContainerId)) {
containerList.add(rmContainer.getContainer());
}
}
return containerList;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class RMAppLifetimeMonitor method expire.
@SuppressWarnings("unchecked")
@Override
protected synchronized void expire(RMAppToMonitor monitoredAppKey) {
ApplicationId appId = monitoredAppKey.getApplicationId();
RMApp app = rmContext.getRMApps().get(appId);
if (app == null) {
return;
}
String diagnostics = "Application is killed by ResourceManager as it" + " has exceeded the lifetime period.";
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(appId, RMAppEventType.KILL, diagnostics));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestRMRestart method testRMRestartAppRunningAMFailed.
@Test(timeout = 60000)
public void testRMRestartAppRunningAMFailed() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", true, true);
MockAM am0 = launchAM(app0, rm1, nm1);
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
// assert the AM failed state is saved.
Assert.assertEquals(RMAppAttemptState.FAILED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
// assert app state has not been saved.
Assert.assertNull(rmAppState.get(app0.getApplicationId()).getState());
// new AM started but not registered, app still stays at ACCECPTED state.
rm1.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// assert the previous AM state is loaded back on RM recovery.
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestRMRestart method testQueueMetricsOnRMRestart.
@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testQueueMetricsOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// PHASE 1: create state in an RM
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
resetQueueMetrics(qm1);
assertQueueMetrics(qm1, 0, 0, 0, 0);
// create app that gets launched and does allocate before RM restart
RMApp app1 = rm1.submitApp(200);
// Need to wait first for AppAttempt to be started (RMAppState.ACCEPTED)
// and then for it to reach RMAppAttemptState.SCHEDULED
// inorder to ensure appsPending metric is incremented
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
assertQueueMetrics(qm1, 1, 1, 0, 0);
nm1.nodeHeartbeat(true);
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
assertQueueMetrics(qm1, 1, 0, 1, 0);
// PHASE 2: create new RM and start from old state
// create new RM to represent restart and recover state
MockRM rm2 = createMockRM(conf, memStore);
QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
resetQueueMetrics(qm2);
assertQueueMetrics(qm2, 0, 0, 0, 0);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
nm1.nodeHeartbeat(true);
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
NMContainerStatus status = TestRMRestart.createNMContainerStatus(loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
while (loadedApp1.getAppAttempts().size() != 2) {
Thread.sleep(200);
}
attempt1 = loadedApp1.getCurrentAppAttempt();
attemptId1 = attempt1.getAppAttemptId();
rm2.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
assertQueueMetrics(qm2, 1, 1, 0, 0);
nm1.nodeHeartbeat(true);
rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
assertQueueMetrics(qm2, 1, 0, 1, 0);
am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// finish the AMs
finishApplicationMaster(loadedApp1, rm2, nm1, am1);
// now AppAttempt and App becomes FINISHED,
// we should also grant APP_ATTEMPT_REMOVE/APP_REMOVE event
// had processed by scheduler
rm2.waitForAppRemovedFromScheduler(loadedApp1.getApplicationId());
assertQueueMetrics(qm2, 1, 0, 0, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestRMRestart method testClientRetryOnKillingApplication.
// This is to test Killing application should be able to wait until app
// reaches killed state and also check that attempt state is saved before app
// state is saved.
@Test(timeout = 60000)
public void testClientRetryOnKillingApplication() throws Exception {
MemoryRMStateStore memStore = new TestMemoryRMStateStore();
memStore.init(conf);
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200, "name", "user", null, false, "default", 1, null, "myType");
MockAM am1 = launchAM(app1, rm1, nm1);
KillApplicationResponse response;
int count = 0;
while (true) {
response = rm1.killApp(app1.getApplicationId());
if (response.getIsKillCompleted()) {
break;
}
Thread.sleep(100);
count++;
}
// we expect at least 2 calls for killApp as the first killApp always return
// false.
Assert.assertTrue(count >= 1);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED);
rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
Assert.assertEquals(1, ((TestMemoryRMStateStore) memStore).updateAttempt);
Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp);
}
Aggregations