use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestRM method testNMTokenSentForNormalContainer.
// Test even if AM container is allocated with containerId not equal to 1, the
// following allocate requests from AM should be able to retrieve the
// corresponding NM Token.
@Test(timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName());
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5120);
RMApp app = rm.submitApp(2000);
RMAppAttempt attempt = app.getCurrentAppAttempt();
// Call getNewContainerId to increase container Id so that the AM container
// Id doesn't equal to one.
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
cs.getApplicationAttempt(attempt.getAppAttemptId()).getNewContainerId();
MockAM am = MockRM.launchAM(app, rm, nm1);
// am container Id not equal to 1.
Assert.assertTrue(attempt.getMasterContainer().getId().getContainerId() != 1);
// NMSecretManager doesn't record the node on which the am is allocated.
Assert.assertFalse(rm.getRMContext().getNMTokenSecretManager().isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
am.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
int NUM_CONTAINERS = 1;
List<Container> containers = new ArrayList<Container>();
// nmTokens keeps track of all the nmTokens issued in the allocate call.
List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
// am1 allocate 1 container on nm1.
while (true) {
AllocateResponse response = am.allocate("127.0.0.1", 2000, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
containers.addAll(response.getAllocatedContainers());
expectedNMTokens.addAll(response.getNMTokens());
if (containers.size() == NUM_CONTAINERS) {
break;
}
Thread.sleep(200);
System.out.println("Waiting for container to be allocated.");
}
NodeId nodeId = expectedNMTokens.get(0).getNodeId();
// NMToken is sent for the allocated container.
Assert.assertEquals(nm1.getNodeId(), nodeId);
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestRM method allocateContainersAndValidateNMTokens.
protected void allocateContainersAndValidateNMTokens(MockAM am, ArrayList<Container> containersReceived, int totalContainerRequested, HashMap<String, Token> nmTokens, MockNM nm) throws Exception, InterruptedException {
ArrayList<ContainerId> releaseContainerList = new ArrayList<ContainerId>();
AllocateResponse response;
ArrayList<ResourceRequest> resourceRequest = new ArrayList<ResourceRequest>();
while (containersReceived.size() < totalContainerRequested) {
nm.nodeHeartbeat(true);
LOG.info("requesting containers..");
response = am.allocate(resourceRequest, releaseContainerList);
containersReceived.addAll(response.getAllocatedContainers());
if (!response.getNMTokens().isEmpty()) {
for (NMToken nmToken : response.getNMTokens()) {
String nodeId = nmToken.getNodeId().toString();
if (nmTokens.containsKey(nodeId)) {
Assert.fail("Duplicate NMToken received for : " + nodeId);
}
nmTokens.put(nodeId, nmToken.getToken());
}
}
LOG.info("Got " + containersReceived.size() + " containers. Waiting to get " + totalContainerRequested);
Thread.sleep(WAIT_SLEEP_MS);
}
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestRM method testNMToken.
@Test(timeout = 40000)
public void testNMToken() throws Exception {
MockRM rm = new MockRM(conf);
try {
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 10000);
NMTokenSecretManagerInRM nmTokenSecretManager = rm.getRMContext().getNMTokenSecretManager();
// submitting new application
RMApp app = rm.submitApp(1000);
// start scheduling.
nm1.nodeHeartbeat(true);
// Starting application attempt and launching
// It should get registered with NMTokenSecretManager.
RMAppAttempt attempt = app.getCurrentAppAttempt();
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
// This will register application master.
am.registerAppAttempt();
ArrayList<Container> containersReceivedForNM1 = new ArrayList<Container>();
List<ContainerId> releaseContainerList = new ArrayList<ContainerId>();
HashMap<String, Token> nmTokens = new HashMap<String, Token>();
// initially requesting 2 containers.
AllocateResponse response = am.allocate("h1", 1000, 2, releaseContainerList);
Assert.assertEquals(0, response.getAllocatedContainers().size());
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2, nmTokens, nm1);
Assert.assertEquals(1, nmTokens.size());
// requesting 2 more containers.
response = am.allocate("h1", 1000, 2, releaseContainerList);
Assert.assertEquals(0, response.getAllocatedContainers().size());
allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4, nmTokens, nm1);
Assert.assertEquals(1, nmTokens.size());
// We will be simulating NM restart so restarting newly added h2:1234
// NM 2 now registers.
MockNM nm2 = rm.registerNode("h2:1234", 10000);
nm2.nodeHeartbeat(true);
ArrayList<Container> containersReceivedForNM2 = new ArrayList<Container>();
response = am.allocate("h2", 1000, 2, releaseContainerList);
Assert.assertEquals(0, response.getAllocatedContainers().size());
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2, nmTokens, nm2);
Assert.assertEquals(2, nmTokens.size());
// Simulating NM-2 restart.
nm2 = rm.registerNode("h2:1234", 10000);
// Wait for reconnect to make it through the RM and create a new RMNode
Map<NodeId, RMNode> nodes = rm.getRMContext().getRMNodes();
while (nodes.get(nm2.getNodeId()).getLastNodeHeartBeatResponse().getResponseId() > 0) {
Thread.sleep(WAIT_SLEEP_MS);
}
int interval = 40;
// Wait for nm Token to be cleared.
while (nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm2.getNodeId()) && interval-- > 0) {
LOG.info("waiting for nmToken to be cleared for : " + nm2.getNodeId());
Thread.sleep(WAIT_SLEEP_MS);
}
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
// removing NMToken for h2:1234
nmTokens.remove(nm2.getNodeId().toString());
Assert.assertEquals(1, nmTokens.size());
// We should again receive the NMToken.
response = am.allocate("h2", 1000, 2, releaseContainerList);
Assert.assertEquals(0, response.getAllocatedContainers().size());
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4, nmTokens, nm2);
Assert.assertEquals(2, nmTokens.size());
// Now rolling over NMToken masterKey. it should resend the NMToken in
// next allocate call.
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm2.getNodeId()));
nmTokenSecretManager.rollMasterKey();
nmTokenSecretManager.activateNextMasterKey();
Assert.assertFalse(nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
Assert.assertFalse(nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm2.getNodeId()));
// It should not remove application attempt entry.
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
nmTokens.clear();
Assert.assertEquals(0, nmTokens.size());
// We should again receive the NMToken.
response = am.allocate("h2", 1000, 1, releaseContainerList);
Assert.assertEquals(0, response.getAllocatedContainers().size());
allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5, nmTokens, nm2);
Assert.assertEquals(1, nmTokens.size());
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm2.getNodeId()));
// After AM is finished making sure that nmtoken entry for app
Assert.assertTrue(nmTokenSecretManager.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
am.unregisterAppAttempt();
// marking all the containers as finished.
for (Container container : containersReceivedForNM1) {
nm1.nodeHeartbeat(attempt.getAppAttemptId(), container.getId().getContainerId(), ContainerState.COMPLETE);
}
for (Container container : containersReceivedForNM2) {
nm2.nodeHeartbeat(attempt.getAppAttemptId(), container.getId().getContainerId(), ContainerState.COMPLETE);
}
nm1.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
Assert.assertFalse(nmTokenSecretManager.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
} finally {
rm.stop();
}
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestAMRestart method testNMTokensRebindOnAMRestart.
@Test(timeout = 30000)
public void testNMTokensRebindOnAMRestart() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 3);
// To prevent test from blacklisting nm1 for AM, we sit threshold to half
// of 2 nodes which is 1
conf.setFloat(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD, 0.5f);
MockRM rm1 = new MockRM(conf);
rm1.start();
RMApp app1 = rm1.submitApp(200, "myname", "myuser", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.1.1.1:4321", 8000, rm1.getResourceTrackerService());
nm2.registerNode();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
List<Container> containers = new ArrayList<Container>();
// nmTokens keeps track of all the nmTokens issued in the allocate call.
List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
// first container
while (true) {
AllocateResponse response = am1.allocate("127.0.0.1", 2000, 2, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
containers.addAll(response.getAllocatedContainers());
expectedNMTokens.addAll(response.getNMTokens());
if (containers.size() == 2) {
break;
}
Thread.sleep(200);
System.out.println("Waiting for container to be allocated.");
}
// launch the container-2
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// launch the container-3
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.RUNNING);
ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
// fail am1
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// restart the am
MockAM am2 = MockRM.launchAM(app1, rm1, nm1);
RegisterApplicationMasterResponse registerResponse = am2.registerAppAttempt();
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// check am2 get the nm token from am1.
Assert.assertEquals(expectedNMTokens.size(), registerResponse.getNMTokensFromPreviousAttempts().size());
for (int i = 0; i < expectedNMTokens.size(); i++) {
Assert.assertTrue(expectedNMTokens.get(i).equals(registerResponse.getNMTokensFromPreviousAttempts().get(i)));
}
// am2 allocate 1 container on nm2
containers = new ArrayList<Container>();
while (true) {
AllocateResponse allocateResponse = am2.allocate("127.1.1.1", 4000, 1, new ArrayList<ContainerId>());
nm2.nodeHeartbeat(true);
containers.addAll(allocateResponse.getAllocatedContainers());
expectedNMTokens.addAll(allocateResponse.getNMTokens());
if (containers.size() == 1) {
break;
}
Thread.sleep(200);
System.out.println("Waiting for container to be allocated.");
}
nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId am2ContainerId2 = ContainerId.newContainerId(am2.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, am2ContainerId2, RMContainerState.RUNNING);
// fail am2.
nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// restart am
MockAM am3 = MockRM.launchAM(app1, rm1, nm1);
registerResponse = am3.registerAppAttempt();
rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// check am3 get the NM token from both am1 and am2;
List<NMToken> transferredTokens = registerResponse.getNMTokensFromPreviousAttempts();
Assert.assertEquals(2, transferredTokens.size());
Assert.assertTrue(transferredTokens.containsAll(expectedNMTokens));
rm1.stop();
}
Aggregations