use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestCapacityScheduler method testParentQueueMaxCapsAreRespected.
@Test
public void testParentQueueMaxCapsAreRespected() throws Exception {
/*
* Queue tree:
* Root
* / \
* A B
* / \
* A1 A2
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
csConf.setCapacity(A, 50);
csConf.setMaximumCapacity(A, 50);
csConf.setCapacity(B, 50);
// Define 2nd-level queues
csConf.setQueues(A, new String[] { "a1", "a2" });
csConf.setCapacity(A1, 50);
csConf.setUserLimitFactor(A1, 100.0f);
csConf.setCapacity(A2, 50);
csConf.setUserLimitFactor(A2, 100.0f);
csConf.setCapacity(B1, B1_CAPACITY);
csConf.setUserLimitFactor(B1, 100.0f);
YarnConfiguration conf = new YarnConfiguration(csConf);
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 24 * GB, rm1.getResourceTrackerService());
nm1.registerNode();
// Launch app1 in a1, resource usage is 1GB (am) + 4GB * 2 = 9GB
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
waitContainerAllocated(am1, 4 * GB, 2, 2, rm1, nm1);
// Try to launch app2 in a2, asked 2GB, should success
RMApp app2 = rm1.submitApp(2 * GB, "app", "user", null, "a2");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
try {
// Try to allocate a container, a's usage=11G/max=12
// a1's usage=9G/max=12
// a2's usage=2G/max=12
// In this case, if a2 asked 2G, should fail.
waitContainerAllocated(am2, 2 * GB, 1, 2, rm1, nm1);
} catch (AssertionError failure) {
// Expected, return;
return;
}
Assert.fail("Shouldn't successfully allocate containers for am2, " + "queue-a's max capacity will be violated if container allocated");
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestApplicationPriority method testRMRestartWithChangeInPriority.
@Test(timeout = 180000)
public void testRMRestartWithChangeInPriority() throws Exception {
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, 10);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// PHASE 1: create state in an RM
// start RM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
Priority appPriority1 = Priority.newInstance(5);
RMApp app1 = rm1.submitApp(1 * GB, appPriority1);
// kick the scheduler, 1 GB given to AM1, remaining 15GB on nm1
MockAM am1 = MockRM.launchAM(app1, rm1, nm1);
am1.registerAppAttempt();
// get scheduler
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
// Change the priority of App1 to 8
Priority appPriority2 = Priority.newInstance(8);
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(app1.getUser());
cs.updateApplicationPriority(appPriority2, app1.getApplicationId(), null, ugi);
// let things settle down
Thread.sleep(1000);
// create new RM to represent restart and recover state
MockRM rm2 = new MockRM(conf, memStore);
// start new RM
rm2.start();
// change NM to point to new RM
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// Verify RM Apps after this restart
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
// get scheduler app
RMApp loadedApp = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
// Verify whether priority 15 is reset to 10
Assert.assertEquals(appPriority2, loadedApp.getApplicationPriority());
rm2.stop();
rm1.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestApplicationPriority method testOrderOfActivatingThePriorityApplicationOnRMRestart.
/**
* <p>
* Test case verifies the order of applications activated after RM Restart.
* </p>
* <li>App-1 and app-2 submitted and scheduled and running with a priority
* 5 and 6 Respectively</li>
* <li>App-3 submitted and scheduled with a priority 7. This
* is not activated since AMResourceLimit is reached</li>
* <li>RM restarted</li>
* <li>App-1 get activated nevertheless of AMResourceLimit</li>
* <li>App-2 and app-3 put in pendingOrderingPolicy</li>
* <li>After NM registration, app-3 is activated</li>
* <p>
* Expected Output : App-2 must get activated since app-2 was running earlier
* </p>
* @throws Exception
*/
@Test
public void testOrderOfActivatingThePriorityApplicationOnRMRestart() throws Exception {
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, 10);
final DrainDispatcher dispatcher = new DrainDispatcher();
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore) {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 16384, rm1.getResourceTrackerService());
nm1.registerNode();
dispatcher.await();
ResourceScheduler scheduler = rm1.getRMContext().getScheduler();
LeafQueue defaultQueue = (LeafQueue) ((CapacityScheduler) scheduler).getQueue("default");
int memory = (int) (defaultQueue.getAMResourceLimit().getMemorySize() / 2);
// App-1 with priority 5 submitted and running
Priority appPriority1 = Priority.newInstance(5);
RMApp app1 = rm1.submitApp(memory, appPriority1);
MockAM am1 = MockRM.launchAM(app1, rm1, nm1);
am1.registerAppAttempt();
// App-2 with priority 6 submitted and running
Priority appPriority2 = Priority.newInstance(6);
RMApp app2 = rm1.submitApp(memory, appPriority2);
MockAM am2 = MockRM.launchAM(app2, rm1, nm1);
am2.registerAppAttempt();
dispatcher.await();
Assert.assertEquals(2, defaultQueue.getNumActiveApplications());
Assert.assertEquals(0, defaultQueue.getNumPendingApplications());
// App-3 with priority 7 submitted and scheduled. But not activated since
// AMResourceLimit threshold
Priority appPriority3 = Priority.newInstance(7);
RMApp app3 = rm1.submitApp(memory, appPriority3);
dispatcher.await();
Assert.assertEquals(2, defaultQueue.getNumActiveApplications());
Assert.assertEquals(1, defaultQueue.getNumPendingApplications());
Iterator<FiCaSchedulerApp> iterator = defaultQueue.getOrderingPolicy().getSchedulableEntities().iterator();
FiCaSchedulerApp fcApp2 = iterator.next();
Assert.assertEquals(app2.getCurrentAppAttempt().getAppAttemptId(), fcApp2.getApplicationAttemptId());
FiCaSchedulerApp fcApp1 = iterator.next();
Assert.assertEquals(app1.getCurrentAppAttempt().getAppAttemptId(), fcApp1.getApplicationAttemptId());
iterator = defaultQueue.getPendingApplications().iterator();
FiCaSchedulerApp fcApp3 = iterator.next();
Assert.assertEquals(app3.getCurrentAppAttempt().getAppAttemptId(), fcApp3.getApplicationAttemptId());
final DrainDispatcher dispatcher1 = new DrainDispatcher();
// create new RM to represent restart and recover state
MockRM rm2 = new MockRM(conf, memStore) {
@Override
protected Dispatcher createDispatcher() {
return dispatcher1;
}
};
// start new RM
rm2.start();
// change NM to point to new RM
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// Verify RM Apps after this restart
Assert.assertEquals(3, rm2.getRMContext().getRMApps().size());
dispatcher1.await();
scheduler = rm2.getRMContext().getScheduler();
defaultQueue = (LeafQueue) ((CapacityScheduler) scheduler).getQueue("default");
// wait for all applications to get added to scheduler
int count = 50;
while (count-- > 0) {
if (defaultQueue.getNumPendingApplications() == 3) {
break;
}
Thread.sleep(50);
}
// Before NM registration, AMResourceLimit threshold is 0. So no
// applications get activated.
Assert.assertEquals(0, defaultQueue.getNumActiveApplications());
Assert.assertEquals(3, defaultQueue.getNumPendingApplications());
// NM resync to new RM
nm1.registerNode();
dispatcher1.await();
// wait for activating applications
count = 50;
while (count-- > 0) {
if (defaultQueue.getNumActiveApplications() == 2) {
break;
}
Thread.sleep(50);
}
Assert.assertEquals(2, defaultQueue.getNumActiveApplications());
Assert.assertEquals(1, defaultQueue.getNumPendingApplications());
// verify for order of activated applications iterator
iterator = defaultQueue.getOrderingPolicy().getSchedulableEntities().iterator();
fcApp2 = iterator.next();
Assert.assertEquals(app2.getCurrentAppAttempt().getAppAttemptId(), fcApp2.getApplicationAttemptId());
fcApp1 = iterator.next();
Assert.assertEquals(app1.getCurrentAppAttempt().getAppAttemptId(), fcApp1.getApplicationAttemptId());
// verify for pending application iterator. It should be app-3 attempt
iterator = defaultQueue.getPendingApplications().iterator();
fcApp3 = iterator.next();
Assert.assertEquals(app3.getCurrentAppAttempt().getAppAttemptId(), fcApp3.getApplicationAttemptId());
rm2.stop();
rm1.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestCapacityScheduler method testPendingResourceUpdatedAccordingToIncreaseRequestChanges.
@Test
public void testPendingResourceUpdatedAccordingToIncreaseRequestChanges() throws Exception {
Configuration conf = TestUtils.getConfigurationWithQueueLabels(new Configuration(false));
conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
final RMNodeLabelsManager mgr = new NullRMNodeLabelsManager();
mgr.init(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm = new MockRM(conf, memStore) {
protected RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm.start();
// label = ""
MockNM nm1 = new MockNM("h1:1234", 200 * GB, rm.getResourceTrackerService());
nm1.registerNode();
// Launch app1 in queue=a1
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "a1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
// Allocate two more containers
am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(2 * GB), 2)), null);
ContainerId containerId1 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
Assert.assertTrue(rm.waitForState(nm1, containerId3, RMContainerState.ALLOCATED));
// Acquire them
am1.allocate(null, null);
sentRMContainerLaunched(rm, ContainerId.newContainerId(am1.getApplicationAttemptId(), 1L));
sentRMContainerLaunched(rm, ContainerId.newContainerId(am1.getApplicationAttemptId(), 2L));
sentRMContainerLaunched(rm, ContainerId.newContainerId(am1.getApplicationAttemptId(), 3L));
// am1 asks to change its AM container from 1GB to 3GB
am1.sendContainerResizingRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, containerId1, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(3 * GB), null)));
FiCaSchedulerApp app = getFiCaSchedulerApp(rm, app1.getApplicationId());
Assert.assertEquals(2 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
checkPendingResource(rm, "a1", 2 * GB, null);
checkPendingResource(rm, "a", 2 * GB, null);
checkPendingResource(rm, "root", 2 * GB, null);
// am1 asks to change containerId2 (2G -> 3G) and containerId3 (2G -> 5G)
am1.sendContainerResizingRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(3 * GB), null), UpdateContainerRequest.newInstance(0, containerId3, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(5 * GB), null)));
Assert.assertEquals(6 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
checkPendingResource(rm, "a1", 6 * GB, null);
checkPendingResource(rm, "a", 6 * GB, null);
checkPendingResource(rm, "root", 6 * GB, null);
// am1 asks to change containerId1 (1G->3G), containerId2 (2G -> 4G) and
// containerId3 (2G -> 2G)
am1.sendContainerResizingRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, containerId1, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(3 * GB), null), UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(4 * GB), null), UpdateContainerRequest.newInstance(0, containerId3, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(2 * GB), null)));
Assert.assertEquals(4 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
checkPendingResource(rm, "a1", 4 * GB, null);
checkPendingResource(rm, "a", 4 * GB, null);
checkPendingResource(rm, "root", 4 * GB, null);
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestAMRMClientOnRMRestart method testAMRMClientOnAMRMTokenRollOverOnRMRestart.
// Test verify for AM issued with rolled-over AMRMToken
// is still able to communicate with restarted RM.
@Test(timeout = 30000)
public void testAMRMClientOnAMRMTokenRollOverOnRMRestart() throws Exception {
conf.setLong(YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, rolling_interval_sec);
conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start first RM
MyResourceManager2 rm1 = new MyResourceManager2(conf, memStore);
rm1.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
Long startTime = System.currentTimeMillis();
// Submit the application
RMApp app = rm1.submitApp(1024);
dispatcher.await();
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// Node heartbeat
nm1.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm1.sendAMLaunched(appAttemptId);
dispatcher.await();
AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.getRMContext().getAMRMTokenSecretManager();
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
ugi.addTokenIdentifier(token.decodeIdentifier());
AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
amClient.init(conf);
amClient.start();
amClient.registerApplicationMaster("h1", 10000, "");
amClient.allocate(0.1f);
// At mean time, the old AMRMToken should continue to work
while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
amClient.allocate(0.1f);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// DO NOTHING
}
}
Assert.assertTrue(amrmTokenSecretManagerForRM1.getMasterKey().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId());
amClient.allocate(0.1f);
// active the nextMasterKey, and replace the currentMasterKey
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
int waitCount = 0;
while (waitCount++ <= 50) {
if (amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId()) {
break;
}
try {
amClient.allocate(0.1f);
} catch (Exception ex) {
break;
}
Thread.sleep(500);
}
Assert.assertTrue(amrmTokenSecretManagerForRM1.getNextMasterKeyData() == null);
Assert.assertTrue(amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
// start 2nd RM
conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(45020, 10));
final MyResourceManager2 rm2 = new MyResourceManager2(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.getRMContext().getAMRMTokenSecretManager();
Assert.assertTrue(amrmTokenSecretManagerForRM2.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
Assert.assertTrue(amrmTokenSecretManagerForRM2.getNextMasterKeyData() == null);
try {
UserGroupInformation testUser = UserGroupInformation.createRemoteUser("testUser");
SecurityUtil.setTokenService(token, rm2.getApplicationMasterService().getBindAddress());
testUser.addToken(token);
testUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, rm2.getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(Records.newRecord(AllocateRequest.class));
Assert.fail("The old Token should not work");
} catch (Exception ex) {
Assert.assertTrue(ex instanceof InvalidToken);
Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + token.decodeIdentifier().getApplicationAttemptId()));
}
// make sure the recovered AMRMToken works for new RM
amClient.allocate(0.1f);
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
amClient.stop();
rm1.stop();
rm2.stop();
}
Aggregations