use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestRMRestart method testRMRestartTimelineCollectorContext.
@Test(timeout = 60000)
public void testRMRestartTimelineCollectorContext() throws Exception {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = null;
MockRM rm2 = null;
try {
rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app.
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// Check if app info has been saved.
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
ApplicationId appId = app.getApplicationId();
TimelineCollectorContext contextBeforeRestart = rm1.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
// Restart RM.
rm2 = createMockRM(conf, memStore);
rm2.start();
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
TimelineCollectorContext contextAfterRestart = rm2.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
Assert.assertEquals("Collector contexts for an app should be same " + "across restarts", contextBeforeRestart, contextAfterRestart);
} finally {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, false);
if (rm1 != null) {
rm1.close();
}
if (rm2 != null) {
rm2.close();
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestRMRestart method testRMRestartOnMaxAppAttempts.
@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app with maxAppAttempts equals to 1
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
// submit an app with maxAppAttempts equals to -1
RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// assert app1 info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
Assert.assertEquals(1, appState.getAttemptCount());
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// Setting AMLivelinessMonitor interval to be 3 Secs.
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify that maxAppAttempts is set to global value
Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
// app1 and app2 are loaded back, but app1 failed because it's
// hitting max-retry.
Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
// app1 failed state is saved in state store. app2 final saved state is not
// determined yet.
Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestRM method testNMTokenSentForNormalContainer.
// Test even if AM container is allocated with containerId not equal to 1, the
// following allocate requests from AM should be able to retrieve the
// corresponding NM Token.
@Test(timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName());
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5120);
RMApp app = rm.submitApp(2000);
RMAppAttempt attempt = app.getCurrentAppAttempt();
// Call getNewContainerId to increase container Id so that the AM container
// Id doesn't equal to one.
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
cs.getApplicationAttempt(attempt.getAppAttemptId()).getNewContainerId();
MockAM am = MockRM.launchAM(app, rm, nm1);
// am container Id not equal to 1.
Assert.assertTrue(attempt.getMasterContainer().getId().getContainerId() != 1);
// NMSecretManager doesn't record the node on which the am is allocated.
Assert.assertFalse(rm.getRMContext().getNMTokenSecretManager().isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
am.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
int NUM_CONTAINERS = 1;
List<Container> containers = new ArrayList<Container>();
// nmTokens keeps track of all the nmTokens issued in the allocate call.
List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
// am1 allocate 1 container on nm1.
while (true) {
AllocateResponse response = am.allocate("127.0.0.1", 2000, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
containers.addAll(response.getAllocatedContainers());
expectedNMTokens.addAll(response.getNMTokens());
if (containers.size() == NUM_CONTAINERS) {
break;
}
Thread.sleep(200);
System.out.println("Waiting for container to be allocated.");
}
NodeId nodeId = expectedNMTokens.get(0).getNodeId();
// NMToken is sent for the allocated container.
Assert.assertEquals(nm1.getNodeId(), nodeId);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestRM method testActivatingApplicationAfterAddingNM.
@Test(timeout = 300000)
public void testActivatingApplicationAfterAddingNM() throws Exception {
MockRM rm1 = new MockRM(conf);
// start like normal because state is empty
rm1.start();
// app that gets launched
RMApp app1 = rm1.submitApp(200);
// app that does not get launched
RMApp app2 = rm1.submitApp(200);
// app1 and app2 should be scheduled, but because no resource is available,
// they are not activated.
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
rm1.waitForState(attemptId2, RMAppAttemptState.SCHEDULED);
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
MockNM nm2 = new MockNM("h2:5678", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
//kick the scheduling
nm1.nodeHeartbeat(true);
// app1 should be allocated now
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
rm1.waitForState(attemptId2, RMAppAttemptState.SCHEDULED);
nm2.nodeHeartbeat(true);
// app2 should be allocated now
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
rm1.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
rm1.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestRMRestart method launchAM.
private MockAM launchAM(RMApp app, MockRM rm, MockNM nm) throws Exception {
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
nm.nodeHeartbeat(true);
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
am.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
return am;
}
Aggregations