use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestApplicationCleanup method testContainerCleanup.
@SuppressWarnings("resource")
@Test
public void testContainerCleanup() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = new MockRM() {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5000);
RMApp app = rm.submitApp(2000);
//kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
am.registerAppAttempt();
//request for containers
int request = 2;
am.allocate("127.0.0.1", 1000, request, new ArrayList<ContainerId>());
dispatcher.await();
//kick the scheduler
nm1.nodeHeartbeat(true);
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size();
int waitCount = 0;
while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get " + request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
dispatcher.await();
contReceived += conts.size();
nm1.nodeHeartbeat(true);
}
Assert.assertEquals(request, contReceived);
// Release a container.
ArrayList<ContainerId> release = new ArrayList<ContainerId>();
release.add(conts.get(0).getId());
am.allocate(new ArrayList<ResourceRequest>(), release);
dispatcher.await();
// Send one more heartbeat with a fake running container. This is to
// simulate the situation that can happen if the NM reports that container
// is running in the same heartbeat when the RM asks it to clean it up.
Map<ApplicationId, List<ContainerStatus>> containerStatuses = new HashMap<ApplicationId, List<ContainerStatus>>();
ArrayList<ContainerStatus> containerStatusList = new ArrayList<ContainerStatus>();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
NodeHeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
waitForContainerCleanup(dispatcher, nm1, resp);
// Now to test the case when RM already gave cleanup, and NM suddenly
// realizes that the container is running.
LOG.info("Testing container launch much after release and " + "NM getting cleanup");
containerStatuses.clear();
containerStatusList.clear();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0).getId(), ContainerState.RUNNING, "nothing", 0, conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
resp = nm1.nodeHeartbeat(containerStatuses, true);
// The cleanup list won't be instantaneous as it is given out by scheduler
// and not RMNodeImpl.
waitForContainerCleanup(dispatcher, nm1, resp);
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestApplicationCleanup method launchAM.
private MockAM launchAM(RMApp app, MockRM rm, MockNM nm) throws Exception {
RMAppAttempt attempt = app.getCurrentAppAttempt();
nm.nodeHeartbeat(true);
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
am.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
return am;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestApplicationMasterLauncher method testallocateBeforeAMRegistration.
@SuppressWarnings("unused")
@Test(timeout = 100000)
public void testallocateBeforeAMRegistration() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
boolean thrown = false;
rootLogger.setLevel(Level.DEBUG);
MockRM rm = new MockRM();
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5000);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
// request for containers
int request = 2;
AllocateResponse ar = null;
try {
ar = am.allocate("h1", 1000, request, new ArrayList<ContainerId>());
Assert.fail();
} catch (ApplicationMasterNotRegisteredException e) {
}
// kick the scheduler
nm1.nodeHeartbeat(true);
AllocateResponse amrs = null;
try {
amrs = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
Assert.fail();
} catch (ApplicationMasterNotRegisteredException e) {
}
am.registerAppAttempt();
try {
am.registerAppAttempt(false);
Assert.fail();
} catch (Exception e) {
Assert.assertEquals("Application Master is already registered : " + attempt.getAppAttemptId().getApplicationId(), e.getMessage());
}
// Simulate an AM that was disconnected and app attempt was removed
// (responseMap does not contain attemptid)
am.unregisterAppAttempt();
nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, ContainerState.COMPLETE);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
try {
amrs = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
Assert.fail();
} catch (ApplicationAttemptNotFoundException e) {
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestApplicationMasterLauncher method testAMLaunchAndCleanup.
@Test
public void testAMLaunchAndCleanup() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
MyContainerManagerImpl containerManager = new MyContainerManagerImpl();
MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
int waitCount = 0;
while (containerManager.launched == false && waitCount++ < 20) {
LOG.info("Waiting for AM Launch to happen..");
Thread.sleep(1000);
}
Assert.assertTrue(containerManager.launched);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId appAttemptId = attempt.getAppAttemptId();
Assert.assertEquals(appAttemptId.toString(), containerManager.attemptIdAtContainerManager);
Assert.assertEquals(app.getSubmitTime(), containerManager.submitTimeAtContainerManager);
Assert.assertEquals(app.getRMAppAttempt(appAttemptId).getMasterContainer().getId().toString(), containerManager.containerIdAtContainerManager);
Assert.assertEquals(nm1.getNodeId().toString(), containerManager.nmHostAtContainerManager);
Assert.assertEquals(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, containerManager.maxAppAttempts);
MockAM am = new MockAM(rm.getRMContext(), rm.getApplicationMasterService(), appAttemptId);
am.registerAppAttempt();
am.unregisterAppAttempt();
//complete the AM container to finish the app normally
nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, ContainerState.COMPLETE);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
waitCount = 0;
while (containerManager.cleanedup == false && waitCount++ < 20) {
LOG.info("Waiting for AM Cleanup to happen..");
Thread.sleep(1000);
}
Assert.assertTrue(containerManager.cleanedup);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class TestApplicationMasterLauncher method testRetriesOnFailures.
@Test
public void testRetriesOnFailures() throws Exception {
final ContainerManagementProtocol mockProxy = mock(ContainerManagementProtocol.class);
final StartContainersResponse mockResponse = mock(StartContainersResponse.class);
when(mockProxy.startContainers(any(StartContainersRequest.class))).thenThrow(new NMNotYetReadyException("foo")).thenReturn(mockResponse);
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
conf.setInt(YarnConfiguration.CLIENT_NM_CONNECT_RETRY_INTERVAL_MS, 1);
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = new MockRMWithCustomAMLauncher(conf, null) {
@Override
protected ApplicationMasterLauncher createAMLauncher() {
return new ApplicationMasterLauncher(getRMContext()) {
@Override
protected Runnable createRunnableLauncher(RMAppAttempt application, AMLauncherEventType event) {
return new AMLauncher(context, application, event, getConfig()) {
@Override
protected YarnRPC getYarnRPC() {
YarnRPC mockRpc = mock(YarnRPC.class);
when(mockRpc.getProxy(any(Class.class), any(InetSocketAddress.class), any(Configuration.class))).thenReturn(mockProxy);
return mockRpc;
}
};
}
};
}
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
dispatcher.await();
MockRM.waitForState(app.getCurrentAppAttempt(), RMAppAttemptState.LAUNCHED, 500);
}
Aggregations