use of org.apache.hadoop.yarn.api.records.ResourceRequest in project hadoop by apache.
the class MockResourceManagerFacade method allocate.
@SuppressWarnings("deprecation")
@Override
public AllocateResponse allocate(AllocateRequest request) throws YarnException, IOException {
if (request.getAskList() != null && request.getAskList().size() > 0 && request.getReleaseList() != null && request.getReleaseList().size() > 0) {
Assert.fail("The mock RM implementation does not support receiving " + "askList and releaseList in the same heartbeat");
}
String amrmToken = getAppIdentifier();
ArrayList<Container> containerList = new ArrayList<Container>();
if (request.getAskList() != null) {
for (ResourceRequest rr : request.getAskList()) {
for (int i = 0; i < rr.getNumContainers(); i++) {
ContainerId containerId = ContainerId.newInstance(getApplicationAttemptId(1), containerIndex.incrementAndGet());
Container container = Records.newRecord(Container.class);
container.setId(containerId);
container.setPriority(rr.getPriority());
// We don't use the node for running containers in the test cases. So
// it is OK to hard code it to some dummy value
NodeId nodeId = NodeId.newInstance(!Strings.isNullOrEmpty(rr.getResourceName()) ? rr.getResourceName() : "dummy", 1000);
container.setNodeId(nodeId);
container.setResource(rr.getCapability());
containerList.add(container);
synchronized (applicationContainerIdMap) {
// Keep track of the containers returned to this application. We
// will need it in future
Assert.assertTrue("The application id is Not registered before allocate(): " + amrmToken, applicationContainerIdMap.containsKey(amrmToken));
List<ContainerId> ids = applicationContainerIdMap.get(amrmToken);
ids.add(containerId);
this.allocatedContainerMap.put(containerId, container);
}
}
}
}
if (request.getReleaseList() != null && request.getReleaseList().size() > 0) {
Log.getLog().info("Releasing containers: " + request.getReleaseList().size());
synchronized (applicationContainerIdMap) {
Assert.assertTrue("The application id is not registered before allocate(): " + amrmToken, applicationContainerIdMap.containsKey(amrmToken));
List<ContainerId> ids = applicationContainerIdMap.get(amrmToken);
for (ContainerId id : request.getReleaseList()) {
boolean found = false;
for (ContainerId c : ids) {
if (c.equals(id)) {
found = true;
break;
}
}
Assert.assertTrue("ContainerId " + id + " being released is not valid for application: " + conf.get("AMRMTOKEN"), found);
ids.remove(id);
// Return the released container back to the AM with new fake Ids. The
// test case does not care about the IDs. The IDs are faked because
// otherwise the LRM will throw duplication identifier exception. This
// returning of fake containers is ONLY done for testing purpose - for
// the test code to get confirmation that the sub-cluster resource
// managers received the release request
ContainerId fakeContainerId = ContainerId.newInstance(getApplicationAttemptId(1), containerIndex.incrementAndGet());
Container fakeContainer = allocatedContainerMap.get(id);
fakeContainer.setId(fakeContainerId);
containerList.add(fakeContainer);
}
}
}
Log.getLog().info("Allocating containers: " + containerList.size() + " for application attempt: " + conf.get("AMRMTOKEN"));
// Always issue a new AMRMToken as if RM rolled master key
Token newAMRMToken = Token.newInstance(new byte[0], "", new byte[0], "");
return AllocateResponse.newInstance(0, new ArrayList<ContainerStatus>(), containerList, new ArrayList<NodeReport>(), null, AMCommand.AM_RESYNC, 1, null, new ArrayList<NMToken>(), newAMRMToken, new ArrayList<UpdatedContainer>());
}
use of org.apache.hadoop.yarn.api.records.ResourceRequest in project hadoop by apache.
the class TestAMRMProxyService method getContainersAndAssert.
private List<Container> getContainersAndAssert(int appId, int numberOfResourceRequests) throws Exception {
AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class);
allocateRequest.setResponseId(1);
List<Container> containers = new ArrayList<Container>(numberOfResourceRequests);
List<ResourceRequest> askList = new ArrayList<ResourceRequest>(numberOfResourceRequests);
for (int testAppId = 0; testAppId < numberOfResourceRequests; testAppId++) {
askList.add(createResourceRequest("test-node-" + Integer.toString(testAppId), 6000, 2, testAppId % 5, 1));
}
allocateRequest.setAskList(askList);
AllocateResponse allocateResponse = allocate(appId, allocateRequest);
Assert.assertNotNull("allocate() returned null response", allocateResponse);
Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
containers.addAll(allocateResponse.getAllocatedContainers());
// Send max 10 heart beats to receive all the containers. If not, we will
// fail the test
int numHeartbeat = 0;
while (containers.size() < askList.size() && numHeartbeat++ < 10) {
allocateResponse = allocate(appId, Records.newRecord(AllocateRequest.class));
Assert.assertNotNull("allocate() returned null response", allocateResponse);
Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
containers.addAll(allocateResponse.getAllocatedContainers());
LOG.info("Number of allocated containers in this request: " + Integer.toString(allocateResponse.getAllocatedContainers().size()));
LOG.info("Total number of allocated containers: " + Integer.toString(containers.size()));
Thread.sleep(10);
}
// We broadcast the request, the number of containers we received will be
// higher than we ask
Assert.assertTrue("The asklist count is not same as response", askList.size() <= containers.size());
return containers;
}
use of org.apache.hadoop.yarn.api.records.ResourceRequest in project hadoop by apache.
the class TestRMRestart method testQueueMetricsOnRMRestart.
@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testQueueMetricsOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// PHASE 1: create state in an RM
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
resetQueueMetrics(qm1);
assertQueueMetrics(qm1, 0, 0, 0, 0);
// create app that gets launched and does allocate before RM restart
RMApp app1 = rm1.submitApp(200);
// Need to wait first for AppAttempt to be started (RMAppState.ACCEPTED)
// and then for it to reach RMAppAttemptState.SCHEDULED
// inorder to ensure appsPending metric is incremented
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
assertQueueMetrics(qm1, 1, 1, 0, 0);
nm1.nodeHeartbeat(true);
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
assertQueueMetrics(qm1, 1, 0, 1, 0);
// PHASE 2: create new RM and start from old state
// create new RM to represent restart and recover state
MockRM rm2 = createMockRM(conf, memStore);
QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
resetQueueMetrics(qm2);
assertQueueMetrics(qm2, 0, 0, 0, 0);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
nm1.nodeHeartbeat(true);
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
NMContainerStatus status = TestRMRestart.createNMContainerStatus(loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
while (loadedApp1.getAppAttempts().size() != 2) {
Thread.sleep(200);
}
attempt1 = loadedApp1.getCurrentAppAttempt();
attemptId1 = attempt1.getAppAttemptId();
rm2.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
assertQueueMetrics(qm2, 1, 1, 0, 0);
nm1.nodeHeartbeat(true);
rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
assertQueueMetrics(qm2, 1, 0, 1, 0);
am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// finish the AMs
finishApplicationMaster(loadedApp1, rm2, nm1, am1);
// now AppAttempt and App becomes FINISHED,
// we should also grant APP_ATTEMPT_REMOVE/APP_REMOVE event
// had processed by scheduler
rm2.waitForAppRemovedFromScheduler(loadedApp1.getApplicationId());
assertQueueMetrics(qm2, 1, 0, 0, 1);
}
use of org.apache.hadoop.yarn.api.records.ResourceRequest in project hadoop by apache.
the class TestRMRestart method testRMRestart.
@SuppressWarnings("rawtypes")
@Test(timeout = 180000)
public void testRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// PHASE 1: create state in an RM
// start RM
MockRM rm1 = createMockRM(conf, memStore);
// start like normal because state is empty
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
MockNM nm2 = new MockNM("127.0.0.2:5678", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// nm2 will not heartbeat with RM1
nm2.registerNode();
// create app that will finish and the final state should be saved.
RMApp app0 = rm1.submitApp(200);
RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
// spot check that app is saved
Assert.assertEquals(1, rmAppState.size());
nm1.nodeHeartbeat(true);
MockAM am0 = rm1.sendAMLaunched(attempt0.getAppAttemptId());
am0.registerAppAttempt();
finishApplicationMaster(app0, rm1, nm1, am0);
// create app that gets launched and does allocate before RM restart
RMApp app1 = rm1.submitApp(200);
// assert app1 info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
//kick the scheduling to allocate AM container
nm1.nodeHeartbeat(true);
// assert app1 attempt is saved
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
Assert.assertEquals(1, appState.getAttemptCount());
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// launch the AM
MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
// AM request for containers
am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
// kick the scheduler
nm1.nodeHeartbeat(true);
List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// create app that does not get launched by RM before RM restart
RMApp app2 = rm1.submitApp(200);
// assert app2 info is saved
appState = rmAppState.get(app2.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app2.getApplicationSubmissionContext().getApplicationId());
// create unmanaged app
RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, null, conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null);
ApplicationAttemptId unmanagedAttemptId = appUnmanaged.getCurrentAppAttempt().getAppAttemptId();
// assert appUnmanaged info is saved
ApplicationId unmanagedAppId = appUnmanaged.getApplicationId();
appState = rmAppState.get(unmanagedAppId);
Assert.assertNotNull(appState);
// wait for attempt to reach LAUNCHED state
rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED);
rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED);
// assert unmanaged attempt info is saved
Assert.assertEquals(1, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), appUnmanaged.getApplicationSubmissionContext().getApplicationId());
// PHASE 2: create new RM and start from old state
// create new RM to represent restart and recover state
MockRM rm2 = createMockRM(conf, memStore);
// start new RM
rm2.start();
// change NM to point to new RM
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm2.setResourceTrackerService(rm2.getResourceTrackerService());
// verify load of old state
// 4 apps are loaded.
// FINISHED app and attempt is also loaded back.
// Unmanaged app state is still loaded back but it cannot be restarted by
// the RM. this will change with work preserving RM restart in which AMs/NMs
// are not rebooted.
Assert.assertEquals(4, rm2.getRMContext().getRMApps().size());
// check that earlier finished app and attempt is also loaded back and move
// to finished state.
rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
// verify correct number of attempts and other data
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
Assert.assertNotNull(loadedApp1);
Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
Assert.assertEquals(app1.getApplicationSubmissionContext().getApplicationId(), loadedApp1.getApplicationSubmissionContext().getApplicationId());
RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId());
Assert.assertNotNull(loadedApp2);
//Assert.assertEquals(0, loadedApp2.getAppAttempts().size());
Assert.assertEquals(app2.getApplicationSubmissionContext().getApplicationId(), loadedApp2.getApplicationSubmissionContext().getApplicationId());
// verify state machine kicked into expected states
rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED);
// verify attempts for apps
// The app for which AM was started will wait for previous am
// container finish event to arrive. However for an application for which
// no am container was running will start new application attempt.
Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
Assert.assertEquals(1, loadedApp2.getAppAttempts().size());
// verify old AM is not accepted
// change running AM to talk to new RM
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
try {
am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
Assert.fail();
} catch (ApplicationAttemptNotFoundException e) {
Assert.assertTrue(e instanceof ApplicationAttemptNotFoundException);
}
// NM should be rebooted on heartbeat, even first heartbeat for nm2
NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
hbResponse = nm2.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
// new NM to represent NM re-register
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
nm2 = new MockNM("127.0.0.2:5678", 15120, rm2.getResourceTrackerService());
NMContainerStatus status = TestRMRestart.createNMContainerStatus(loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
nm2.registerNode();
rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
// wait for the 2nd attempt to be started.
int timeoutSecs = 0;
while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
;
Thread.sleep(200);
}
// verify no more reboot response sent
hbResponse = nm1.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
hbResponse = nm2.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
// assert app1 attempt is saved
attempt1 = loadedApp1.getCurrentAppAttempt();
attemptId1 = attempt1.getAppAttemptId();
rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
appState = rmAppState.get(loadedApp1.getApplicationId());
attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// Nodes on which the AM's run
MockNM am1Node = nm1;
if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
am1Node = nm2;
}
// assert app2 attempt is saved
RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt();
ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
appState = rmAppState.get(loadedApp2.getApplicationId());
attemptState = appState.getAttempt(attemptId2);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1), attemptState.getMasterContainer().getId());
MockNM am2Node = nm1;
if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
am2Node = nm2;
}
// start the AM's
am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
MockAM am2 = rm2.sendAMLaunched(attempt2.getAppAttemptId());
am2.registerAppAttempt();
//request for containers
am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
am2.allocate("127.0.0.2", 1000, 1, new ArrayList<ContainerId>());
// verify container allocate continues to work
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() == 0) {
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// finish the AMs
finishApplicationMaster(loadedApp1, rm2, am1Node, am1);
finishApplicationMaster(loadedApp2, rm2, am2Node, am2);
// stop RM's
rm2.stop();
rm1.stop();
// completed apps are not removed immediately after app finish
// And finished app is also loaded back.
Assert.assertEquals(4, rmAppState.size());
}
use of org.apache.hadoop.yarn.api.records.ResourceRequest in project hadoop by apache.
the class TestSystemMetricsPublisher method createRMApp.
private static RMApp createRMApp(ApplicationId appId) {
RMApp app = mock(RMAppImpl.class);
when(app.getApplicationId()).thenReturn(appId);
when(app.getName()).thenReturn("test app");
when(app.getApplicationType()).thenReturn("test app type");
when(app.getUser()).thenReturn("test user");
when(app.getQueue()).thenReturn("test queue");
when(app.getSubmitTime()).thenReturn(Integer.MAX_VALUE + 1L);
when(app.getStartTime()).thenReturn(Integer.MAX_VALUE + 2L);
when(app.getFinishTime()).thenReturn(Integer.MAX_VALUE + 3L);
when(app.getDiagnostics()).thenReturn(new StringBuilder("test diagnostics info"));
RMAppAttempt appAttempt = mock(RMAppAttempt.class);
when(appAttempt.getAppAttemptId()).thenReturn(ApplicationAttemptId.newInstance(appId, 1));
when(app.getCurrentAppAttempt()).thenReturn(appAttempt);
when(app.getFinalApplicationStatus()).thenReturn(FinalApplicationStatus.UNDEFINED);
when(app.getRMAppMetrics()).thenReturn(new RMAppMetrics(null, 0, 0, Integer.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE));
Set<String> appTags = new HashSet<String>();
appTags.add("test");
appTags.add("tags");
when(app.getApplicationTags()).thenReturn(appTags);
ApplicationSubmissionContext asc = mock(ApplicationSubmissionContext.class);
when(asc.getUnmanagedAM()).thenReturn(false);
when(asc.getPriority()).thenReturn(Priority.newInstance(10));
when(asc.getNodeLabelExpression()).thenReturn("high-cpu");
ContainerLaunchContext containerLaunchContext = mock(ContainerLaunchContext.class);
when(containerLaunchContext.getCommands()).thenReturn(Collections.singletonList("java -Xmx1024m"));
when(asc.getAMContainerSpec()).thenReturn(containerLaunchContext);
when(app.getApplicationSubmissionContext()).thenReturn(asc);
when(app.getAppNodeLabelExpression()).thenCallRealMethod();
ResourceRequest amReq = mock(ResourceRequest.class);
when(amReq.getNodeLabelExpression()).thenReturn("high-mem");
when(app.getAMResourceRequest()).thenReturn(amReq);
when(app.getAmNodeLabelExpression()).thenCallRealMethod();
when(app.getApplicationPriority()).thenReturn(Priority.newInstance(10));
when(app.getCallerContext()).thenReturn(new CallerContext.Builder("context").build());
return app;
}
Aggregations