use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestDelegationTokenRenewer method testRenewTokenUsingTokenConfProvidedByApp.
// Test DelegationTokenRenewer uses the tokenConf provided by application
// for token renewal.
@Test
public void testRenewTokenUsingTokenConfProvidedByApp() throws Exception {
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
MockRM rm = new TestSecurityMockRM(conf, null);
rm.start();
final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
nm1.registerNode();
// create a token
Text userText1 = new Text("user1");
DelegationTokenIdentifier dtId1 = new DelegationTokenIdentifier(userText1, new Text("renewer1"), userText1);
final Token<DelegationTokenIdentifier> token1 = new Token<DelegationTokenIdentifier>(dtId1.getBytes(), "password1".getBytes(), dtId1.getKind(), new Text("service1"));
Credentials credentials = new Credentials();
credentials.addToken(userText1, token1);
// create token conf for renewal
Configuration appConf = new Configuration(false);
appConf.set("dfs.nameservices", "mycluster1,mycluster2");
appConf.set("dfs.namenode.rpc-address.mycluster2.nn1", "123.0.0.1");
appConf.set("dfs.namenode.rpc-address.mycluster2.nn2", "123.0.0.2");
appConf.set("dfs.ha.namenodes.mycluster2", "nn1,nn2");
appConf.set("dfs.client.failover.proxy.provider.mycluster2", "provider");
DataOutputBuffer dob = new DataOutputBuffer();
appConf.write(dob);
ByteBuffer tokenConf = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
final int confSize = appConf.size();
// submit app
RMApp app = rm.submitApp(credentials, tokenConf);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
DelegationTokenToRenew toRenew = rm.getRMContext().getDelegationTokenRenewer().getAllTokens().get(token1);
// the specific config we added.
return toRenew != null && toRenew.conf != null && toRenew.conf.size() == confSize && toRenew.conf.get("dfs.namenode.rpc-address.mycluster2.nn1").equals("123.0.0.1");
}
}, 200, 10000);
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestAMRestart method testAMRestartNotLostContainerCompleteMsg.
@Test(timeout = 30000)
public void testAMRestartNotLostContainerCompleteMsg() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MockRM rm1 = new MockRM(conf);
rm1.start();
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
nm1.registerNode();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
allocateContainers(nm1, am1, 1);
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// container complete
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.COMPLETE);
rm1.waitForState(nm1, containerId2, RMContainerState.COMPLETED);
// before this msg pass to AM, AM may crash
while (true) {
AllocateResponse response = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = response.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Thread.sleep(100);
continue;
}
// is containerId still in justFinishedContainer?
containerStatuses = app1.getCurrentAppAttempt().getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
break;
}
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// wait for app to start a new attempt.
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// assert this is a new AM.
ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
// launch the new AM
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
MockAM am2 = rm1.launchAndRegisterAM(app1, rm1, nm1);
// whether new AM could get container complete msg
AllocateResponse allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Assert.fail();
}
containerStatuses = attempt2.getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
// the second allocate should not get container complete msg
allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
rm1.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestAMRestart method testRMAppAttemptFailuresValidityInterval.
@Test(timeout = 120000)
public void testRMAppAttemptFailuresValidityInterval() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
// explicitly set max-am-retry count as 2.
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
// set window size to a larger number : 60s
// we will verify the app should be failed if
// two continuous attempts failed in 60s.
RMApp app = rm1.submitApp(200, 60000, false);
MockAM am = MockRM.launchAM(app, rm1, nm1);
// Fail current attempt normally
nm1.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// launch the second attempt
rm1.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
Assert.assertEquals(2, app.getAppAttempts().size());
MockAM am_2 = MockRM.launchAndRegisterAM(app, rm1, nm1);
rm1.waitForState(am_2.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
nm1.nodeHeartbeat(am_2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am_2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// current app should be failed.
rm1.waitForState(app.getApplicationId(), RMAppState.FAILED);
ControlledClock clock = new ControlledClock();
// set window size to 10s
RMAppImpl app1 = (RMAppImpl) rm1.submitApp(200, 10000, false);
app1.setSystemClock(clock);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Fail attempt1 normally
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
//Wait to make sure attempt1 be removed in State Store
//TODO explore a better way than sleeping for a while (YARN-4929)
Thread.sleep(15 * 1000);
// launch the second attempt
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
Assert.assertEquals(2, app1.getAppAttempts().size());
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// wait for 10 seconds
clock.setTime(System.currentTimeMillis() + 10 * 1000);
// Fail attempt2 normally
nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// can launch the third attempt successfully
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
Assert.assertEquals(3, app1.getAppAttempts().size());
RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
clock.reset();
MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// Restart rm.
@SuppressWarnings("resource") MockRM rm2 = new MockRM(conf, memStore);
rm2.start();
ApplicationStateData app1State = memStore.getState().getApplicationState().get(app1.getApplicationId());
Assert.assertEquals(1, app1State.getFirstAttemptId());
// re-register the NM
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
NMContainerStatus status = Records.newRecord(NMContainerStatus.class);
status.setContainerExitStatus(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
status.setContainerId(attempt3.getMasterContainer().getId());
status.setContainerState(ContainerState.COMPLETE);
status.setDiagnostics("");
nm1.registerNode(Collections.singletonList(status), null);
rm2.waitForState(attempt3.getAppAttemptId(), RMAppAttemptState.FAILED);
//Wait to make sure attempt3 be removed in State Store
//TODO explore a better way than sleeping for a while (YARN-4929)
Thread.sleep(15 * 1000);
Assert.assertEquals(2, app1State.getAttemptCount());
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// Lauch Attempt 4
MockAM am4 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
// wait for 10 seconds
clock.setTime(System.currentTimeMillis() + 10 * 1000);
// Fail attempt4 normally
nm1.nodeHeartbeat(am4.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm2.waitForState(am4.getApplicationAttemptId(), RMAppAttemptState.FAILED);
Assert.assertEquals(2, app1State.getAttemptCount());
// can launch the 5th attempt successfully
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am5 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 5, nm1);
clock.reset();
rm2.waitForState(am5.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// Fail attempt5 normally
nm1.nodeHeartbeat(am5.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm2.waitForState(am5.getApplicationAttemptId(), RMAppAttemptState.FAILED);
Assert.assertEquals(2, app1State.getAttemptCount());
rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestAMRestart method testPreemptedAMRestartOnRMRestart.
// Test RM restarts after AM container is preempted, new RM should not count
// AM preemption failure towards the max-retry-account and should be able to
// re-launch the AM.
@Test(timeout = 20000)
public void testPreemptedAMRestartOnRMRestart() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
// explicitly set max-am-retry count as 1.
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
CapacityScheduler scheduler = (CapacityScheduler) rm1.getResourceScheduler();
ContainerId amContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
// Forcibly preempt the am container;
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
Assert.assertTrue(!attempt1.shouldCountTowardsMaxAttemptRetry());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// state store has 1 attempt stored.
ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
Assert.assertEquals(1, appState.getAttemptCount());
// attempt stored has the preempted container exit status.
Assert.assertEquals(ContainerExitStatus.PREEMPTED, appState.getAttempt(am1.getApplicationAttemptId()).getAMContainerExitStatus());
// Restart rm.
MockRM rm2 = new MockRM(conf, memStore);
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1.registerNode();
rm2.start();
// Restarted RM should re-launch the am.
MockAM am2 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am2);
RMAppAttempt attempt2 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()).getCurrentAppAttempt();
Assert.assertTrue(attempt2.shouldCountTowardsMaxAttemptRetry());
Assert.assertEquals(ContainerExitStatus.INVALID, appState.getAttempt(am2.getApplicationAttemptId()).getAMContainerExitStatus());
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestAMRestart method testShouldNotCountFailureToMaxAttemptRetry.
// AM container preempted, nm disk failure
// should not be counted towards AM max retry count.
@Test(timeout = 100000)
public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
// explicitly set max-am-retry count as 1.
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
CapacityScheduler scheduler = (CapacityScheduler) rm1.getResourceScheduler();
ContainerId amContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
// Preempt the first attempt;
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am1.getApplicationAttemptId());
Assert.assertTrue(!attempt1.shouldCountTowardsMaxAttemptRetry());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
// AM should be restarted even though max-am-attempt is 1.
MockAM am2 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
// Preempt the second attempt.
ContainerId amContainer2 = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer2));
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am2.getApplicationAttemptId());
Assert.assertTrue(!attempt2.shouldCountTowardsMaxAttemptRetry());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am3 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
// mimic NM disk_failure
ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class);
containerStatus.setContainerId(attempt3.getMasterContainer().getId());
containerStatus.setDiagnostics("mimic NM disk_failure");
containerStatus.setState(ContainerState.COMPLETE);
containerStatus.setExitStatus(ContainerExitStatus.DISKS_FAILED);
Map<ApplicationId, List<ContainerStatus>> conts = new HashMap<ApplicationId, List<ContainerStatus>>();
conts.put(app1.getApplicationId(), Collections.singletonList(containerStatus));
nm1.nodeHeartbeat(conts, true);
rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am3.getApplicationAttemptId());
Assert.assertTrue(!attempt3.shouldCountTowardsMaxAttemptRetry());
Assert.assertEquals(ContainerExitStatus.DISKS_FAILED, appState.getAttempt(am3.getApplicationAttemptId()).getAMContainerExitStatus());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am4 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
// create second NM, and register to rm1
MockNM nm2 = new MockNM("127.0.0.1:2234", 8000, rm1.getResourceTrackerService());
nm2.registerNode();
// nm1 heartbeats to report unhealthy
// This will mimic ContainerExitStatus.ABORT
nm1.nodeHeartbeat(false);
rm1.waitForState(am4.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am4.getApplicationAttemptId());
Assert.assertTrue(!attempt4.shouldCountTowardsMaxAttemptRetry());
Assert.assertEquals(ContainerExitStatus.ABORTED, appState.getAttempt(am4.getApplicationAttemptId()).getAMContainerExitStatus());
// launch next AM in nm2
MockAM am5 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 5, nm2);
RMAppAttempt attempt5 = app1.getCurrentAppAttempt();
// fail the AM normally
nm2.nodeHeartbeat(am5.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am5.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am5.getApplicationAttemptId());
Assert.assertTrue(attempt5.shouldCountTowardsMaxAttemptRetry());
// AM should not be restarted.
rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);
Assert.assertEquals(5, app1.getAppAttempts().size());
rm1.stop();
}
Aggregations