use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.
the class TestAMRestart method testAMRestartWithExistingContainers.
@Test(timeout = 30000)
public void testAMRestartWithExistingContainers() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MockRM rm1 = new MockRM(conf);
rm1.start();
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.0.0.1:2351", 4089, rm1.getResourceTrackerService());
nm2.registerNode();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
int NUM_CONTAINERS = 3;
allocateContainers(nm1, am1, NUM_CONTAINERS);
// launch the 2nd container, for testing running container transferred.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// launch the 3rd container, for testing container allocated by previous
// attempt is completed by the next new attempt/
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.RUNNING);
ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
// 4th container still in AQUIRED state. for testing Acquired container is
// always killed.
ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
rm1.waitForState(nm1, containerId4, RMContainerState.ACQUIRED);
// 5th container is in Allocated state. for testing allocated container is
// always killed.
am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
ContainerId containerId5 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 5);
rm1.waitForState(nm1, containerId5, RMContainerState.ALLOCATED);
// 6th container is in Reserved state.
am1.allocate("127.0.0.1", 6000, 1, new ArrayList<ContainerId>());
ContainerId containerId6 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 6);
nm1.nodeHeartbeat(true);
SchedulerApplicationAttempt schedulerAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId6);
while (schedulerAttempt.getReservedContainers().isEmpty()) {
System.out.println("Waiting for container " + containerId6 + " to be reserved.");
nm1.nodeHeartbeat(true);
Thread.sleep(200);
}
// assert containerId6 is reserved.
Assert.assertEquals(containerId6, schedulerAttempt.getReservedContainers().get(0).getContainerId());
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// wait for some time. previous AM's running containers should still remain
// in scheduler even though am failed
Thread.sleep(3000);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// acquired/allocated containers are cleaned up.
Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId4));
Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId5));
// wait for app to start a new attempt.
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// assert this is a new AM.
ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
// launch the new AM
MockAM am2 = rm1.launchAM(app1, rm1, nm1);
RegisterApplicationMasterResponse registerResponse = am2.registerAppAttempt();
// Assert two containers are running: container2 and container3;
Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempts().size());
boolean containerId2Exists = false, containerId3Exists = false;
for (Container container : registerResponse.getContainersFromPreviousAttempts()) {
if (container.getId().equals(containerId2)) {
containerId2Exists = true;
}
if (container.getId().equals(containerId3)) {
containerId3Exists = true;
}
}
Assert.assertTrue(containerId2Exists && containerId3Exists);
rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
// complete container by sending the container complete event which has earlier
// attempt's attemptId
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
// Even though the completed container containerId3 event was sent to the
// earlier failed attempt, new RMAppAttempt can also capture this container
// info.
// completed containerId4 is also transferred to the new attempt.
RMAppAttempt newAttempt = app1.getRMAppAttempt(am2.getApplicationAttemptId());
// 4 containers finished, acquired/allocated/reserved/completed.
waitForContainersToFinish(4, newAttempt);
boolean container3Exists = false, container4Exists = false, container5Exists = false, container6Exists = false;
for (ContainerStatus status : newAttempt.getJustFinishedContainers()) {
if (status.getContainerId().equals(containerId3)) {
// containerId3 is the container ran by previous attempt but finished by the
// new attempt.
container3Exists = true;
}
if (status.getContainerId().equals(containerId4)) {
// containerId4 is the Acquired Container killed by the previous attempt,
// it's now inside new attempt's finished container list.
container4Exists = true;
}
if (status.getContainerId().equals(containerId5)) {
// containerId5 is the Allocated container killed by previous failed attempt.
container5Exists = true;
}
if (status.getContainerId().equals(containerId6)) {
// containerId6 is the reserved container killed by previous failed attempt.
container6Exists = true;
}
}
Assert.assertTrue(container3Exists && container4Exists && container5Exists && container6Exists);
// New SchedulerApplicationAttempt also has the containers info.
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// record the scheduler attempt for testing.
SchedulerApplicationAttempt schedulerNewAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId2);
// finish this application
MockRM.finishAMAndVerifyAppState(app1, rm1, nm1, am2);
// the 2nd attempt released the 1st attempt's running container, when the
// 2nd attempt finishes.
Assert.assertFalse(schedulerNewAttempt.getLiveContainers().contains(containerId2));
// all 4 normal containers finished.
System.out.println("New attempt's just finished containers: " + newAttempt.getJustFinishedContainers());
waitForContainersToFinish(5, newAttempt);
rm1.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.
the class TestCapacityScheduler method testAddAndRemoveAppFromCapacityScheduler.
@Test
public void testAddAndRemoveAppFromCapacityScheduler() throws Exception {
CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
setupQueueConfiguration(conf);
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
@SuppressWarnings("unchecked") AbstractYarnScheduler<SchedulerApplicationAttempt, SchedulerNode> cs = (AbstractYarnScheduler<SchedulerApplicationAttempt, SchedulerNode>) rm.getResourceScheduler();
SchedulerApplication<SchedulerApplicationAttempt> app = TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(cs.getSchedulerApplications(), cs, "a1");
Assert.assertEquals("a1", app.getQueue().getQueueName());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.
the class TestWorkPreservingRMRestart method waitForNumContainersToRecover.
public static void waitForNumContainersToRecover(int num, MockRM rm, ApplicationAttemptId attemptId) throws Exception {
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm.getResourceScheduler();
SchedulerApplicationAttempt attempt = scheduler.getApplicationAttempt(attemptId);
while (attempt == null) {
System.out.println("Wait for scheduler attempt " + attemptId + " to be created");
Thread.sleep(200);
attempt = scheduler.getApplicationAttempt(attemptId);
}
while (attempt.getLiveContainers().size() < num) {
System.out.println("Wait for " + num + " containers to recover. currently: " + attempt.getLiveContainers().size());
Thread.sleep(200);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.
the class TestWorkPreservingRMRestartForNodeLabel method waitForNumContainersToRecover.
@SuppressWarnings("rawtypes")
public static void waitForNumContainersToRecover(int num, MockRM rm, ApplicationAttemptId attemptId) throws Exception {
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm.getResourceScheduler();
SchedulerApplicationAttempt attempt = scheduler.getApplicationAttempt(attemptId);
while (attempt == null) {
System.out.println("Wait for scheduler attempt " + attemptId + " to be created");
Thread.sleep(200);
attempt = scheduler.getApplicationAttempt(attemptId);
}
while (attempt.getLiveContainers().size() < num) {
System.out.println("Wait for " + num + " containers to recover. currently: " + attempt.getLiveContainers().size());
Thread.sleep(200);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.
the class OpportunisticContainerAllocatorAMService method allocateInternal.
@Override
protected void allocateInternal(ApplicationAttemptId appAttemptId, AllocateRequest request, AllocateResponse allocateResponse) throws YarnException {
// Partition requests to GUARANTEED and OPPORTUNISTIC.
OpportunisticContainerAllocator.PartitionedResourceRequests partitionedAsks = oppContainerAllocator.partitionAskList(request.getAskList());
// Allocate OPPORTUNISTIC containers.
SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) rmContext.getScheduler()).getApplicationAttempt(appAttemptId);
OpportunisticContainerContext oppCtx = appAttempt.getOpportunisticContainerContext();
oppCtx.updateNodeList(getLeastLoadedNodes());
List<Container> oppContainers = oppContainerAllocator.allocateContainers(request.getResourceBlacklistRequest(), partitionedAsks.getOpportunistic(), appAttemptId, oppCtx, ResourceManager.getClusterTimeStamp(), appAttempt.getUser());
// Create RMContainers and update the NMTokens.
if (!oppContainers.isEmpty()) {
handleNewContainers(oppContainers, false);
appAttempt.updateNMTokens(oppContainers);
addToAllocatedContainers(allocateResponse, oppContainers);
}
// Allocate GUARANTEED containers.
request.setAskList(partitionedAsks.getGuaranteed());
super.allocateInternal(appAttemptId, request, allocateResponse);
}
Aggregations