use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMTokens method testAMRMMasterKeysUpdate.
@Test(timeout = 20000)
public void testAMRMMasterKeysUpdate() throws Exception {
final AtomicReference<AMRMTokenSecretManager> spySecretMgrRef = new AtomicReference<AMRMTokenSecretManager>();
MockRM rm = new MockRM(conf) {
@Override
protected void doSecureLogin() throws IOException {
// Skip the login.
}
@Override
protected RMSecretManagerService createRMSecretManagerService() {
return new RMSecretManagerService(conf, rmContext) {
@Override
protected AMRMTokenSecretManager createAMRMTokenSecretManager(Configuration conf, RMContext rmContext) {
AMRMTokenSecretManager spySecretMgr = spy(super.createAMRMTokenSecretManager(conf, rmContext));
spySecretMgrRef.set(spySecretMgr);
return spySecretMgr;
}
};
}
};
rm.start();
MockNM nm = rm.registerNode("127.0.0.1:1234", 8000);
RMApp app = rm.submitApp(200);
MockAM am = MockRM.launchAndRegisterAM(app, rm, nm);
AMRMTokenSecretManager spySecretMgr = spySecretMgrRef.get();
// Do allocate. Should not update AMRMToken
AllocateResponse response = am.allocate(Records.newRecord(AllocateRequest.class));
Assert.assertNull(response.getAMRMToken());
Token<AMRMTokenIdentifier> oldToken = rm.getRMContext().getRMApps().get(app.getApplicationId()).getRMAppAttempt(am.getApplicationAttemptId()).getAMRMToken();
// roll over the master key
// Do allocate again. the AM should get the latest AMRMToken
rm.getRMContext().getAMRMTokenSecretManager().rollMasterKey();
response = am.allocate(Records.newRecord(AllocateRequest.class));
Assert.assertNotNull(response.getAMRMToken());
Token<AMRMTokenIdentifier> amrmToken = ConverterUtils.convertFromYarn(response.getAMRMToken(), new Text(response.getAMRMToken().getService()));
Assert.assertEquals(amrmToken.decodeIdentifier().getKeyId(), rm.getRMContext().getAMRMTokenSecretManager().getMasterKey().getMasterKey().getKeyId());
// Do allocate again with the same old token and verify the RM sends
// back the last generated token instead of generating it again.
reset(spySecretMgr);
UserGroupInformation ugi = UserGroupInformation.createUserForTesting(am.getApplicationAttemptId().toString(), new String[0]);
ugi.addTokenIdentifier(oldToken.decodeIdentifier());
response = am.doAllocateAs(ugi, Records.newRecord(AllocateRequest.class));
Assert.assertNotNull(response.getAMRMToken());
verify(spySecretMgr, never()).createAndGetAMRMToken(isA(ApplicationAttemptId.class));
// Do allocate again with the updated token and verify we do not
// receive a new token to use.
response = am.allocate(Records.newRecord(AllocateRequest.class));
Assert.assertNull(response.getAMRMToken());
// Activate the next master key. Since there is new master key generated
// in AMRMTokenSecretManager. The AMRMToken will not get updated for AM
rm.getRMContext().getAMRMTokenSecretManager().activateNextMasterKey();
response = am.allocate(Records.newRecord(AllocateRequest.class));
Assert.assertNull(response.getAMRMToken());
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestFifoScheduler method testFifoScheduling.
@Test(timeout = 60000)
public void testFifoScheduling() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
RMApp app2 = rm.submitApp(2048);
// kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
nm2.nodeHeartbeat(true);
RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
am2.registerAppAttempt();
SchedulerNodeReport report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// add request for containers
am2.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 0, 1);
// send the request
AllocateResponse alloc2Response = am2.schedule();
// kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(1000);
alloc1Response = am1.schedule();
}
while (alloc2Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 2...");
Thread.sleep(1000);
alloc2Response = am2.schedule();
}
// kick the scheduler, nothing given remaining 2 GB.
nm2.nodeHeartbeat(true);
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(1 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
List<Container> allocated2 = alloc2Response.getAllocatedContainers();
Assert.assertEquals(1, allocated2.size());
Assert.assertEquals(3 * GB, allocated2.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm2.getAvailableResource().getMemorySize());
Assert.assertEquals(6 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
Container c1 = allocated1.get(0);
Assert.assertEquals(GB, c1.getResource().getMemorySize());
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
int waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(5 * GB, report_nm1.getUsedResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestFifoScheduler method testResourceOverCommit.
@Test(timeout = 60000)
public void testResourceOverCommit() throws Exception {
int waitCount;
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(1000);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
rm.getAdminService().updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (null != report_nm1 && report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus
// value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource
// over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRestart method testAMRestartNotLostContainerCompleteMsg.
@Test(timeout = 30000)
public void testAMRestartNotLostContainerCompleteMsg() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MockRM rm1 = new MockRM(conf);
rm1.start();
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
nm1.registerNode();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
allocateContainers(nm1, am1, 1);
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// container complete
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.COMPLETE);
rm1.waitForState(nm1, containerId2, RMContainerState.COMPLETED);
// before this msg pass to AM, AM may crash
while (true) {
AllocateResponse response = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = response.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Thread.sleep(100);
continue;
}
// is containerId still in justFinishedContainer?
containerStatuses = app1.getCurrentAppAttempt().getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
break;
}
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// wait for app to start a new attempt.
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// assert this is a new AM.
ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
// launch the new AM
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
MockAM am2 = rm1.launchAndRegisterAM(app1, rm1, nm1);
// whether new AM could get container complete msg
AllocateResponse allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Assert.fail();
}
containerStatuses = attempt2.getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
// the second allocate should not get container complete msg
allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
rm1.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestCapacityScheduler method testAppReservationWithDominantResourceCalculator.
// Test to ensure that we don't carry out reservation on nodes
// that have no CPU available when using the DominantResourceCalculator
@Test(timeout = 30000)
public void testAppReservationWithDominantResourceCalculator() throws Exception {
CapacitySchedulerConfiguration csconf = new CapacitySchedulerConfiguration();
csconf.setResourceComparator(DominantResourceCalculator.class);
YarnConfiguration conf = new YarnConfiguration(csconf);
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10 * GB, 1);
// register extra nodes to bump up cluster resource
MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10 * GB, 4);
rm.registerNode("127.0.0.1:1236", 10 * GB, 4);
RMApp app1 = rm.submitApp(1024);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report
Assert.assertEquals(1 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(9 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 1 * GB, 1, 1);
// send the request
am1.schedule();
// kick the scheduler, container reservation should not happen
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
AllocateResponse allocResponse = am1.schedule();
ApplicationResourceUsageReport report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(0, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
// container should get allocated on this node
nm2.nodeHeartbeat(true);
while (allocResponse.getAllocatedContainers().size() == 0) {
Thread.sleep(100);
allocResponse = am1.schedule();
}
report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(1, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
rm.stop();
}
Aggregations