use of org.apache.hadoop.yarn.util.ControlledClock in project hadoop by apache.
the class TestRMContainerAllocator method testUpdateAskOnRampDownAllReduces.
@Test
public void testUpdateAskOnRampDownAllReduces() throws Exception {
LOG.info("Running testUpdateAskOnRampDownAllReduces");
Configuration conf = new Configuration();
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// Use a controlled clock to advance time for test.
ControlledClock clock = (ControlledClock) allocator.getContext().getClock();
clock.setTime(System.currentTimeMillis());
// Register nodes to RM.
MockNM nodeManager = rm.registerNode("h1:1234", 1024);
dispatcher.await();
// Request 2 maps and 1 reducer(sone on nodes which are not registered).
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
allocator.sendRequest(event2);
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h2" }, false, true);
allocator.sendRequest(event3);
// This will tell the scheduler about the requests but there will be no
// allocations as nodes are not added.
allocator.schedule();
dispatcher.await();
// Advance clock so that maps can be considered as hanging.
clock.setTime(System.currentTimeMillis() + 500000L);
// Request for another reducer on h3 which has not registered.
ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
allocator.sendRequest(event4);
allocator.schedule();
dispatcher.await();
// Update resources in scheduler through node heartbeat from h1.
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
allocator.schedule();
dispatcher.await();
// One map is assigned.
Assert.assertEquals(1, allocator.getAssignedRequests().maps.size());
// Send deallocate request for map so that no maps are assigned after this.
ContainerAllocatorEvent deallocate = createDeallocateEvent(jobId, 1, false);
allocator.sendDeallocate(deallocate);
// Now one reducer should be scheduled and one should be pending.
Assert.assertEquals(1, allocator.getScheduledRequests().reduces.size());
Assert.assertEquals(1, allocator.getNumOfPendingReduces());
// No map should be assigned and one should be scheduled.
Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
Assert.assertEquals(6, allocator.getAsk().size());
for (ResourceRequest req : allocator.getAsk()) {
boolean isReduce = req.getPriority().equals(RMContainerAllocator.PRIORITY_REDUCE);
if (isReduce) {
// 1 reducer each asked on h2, * and default-rack
Assert.assertTrue((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2")) && req.getNumContainers() == 1);
} else {
//map
// 0 mappers asked on h1 and 1 each on * and default-rack
Assert.assertTrue(((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack")) && req.getNumContainers() == 1) || (req.getResourceName().equals("h1") && req.getNumContainers() == 0));
}
}
// On next allocate request to scheduler, headroom reported will be 0.
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(0, 0));
allocator.schedule();
dispatcher.await();
// After allocate response from scheduler, all scheduled reduces are ramped
// down and move to pending. 3 asks are also updated with 0 containers to
// indicate ramping down of reduces to scheduler.
Assert.assertEquals(0, allocator.getScheduledRequests().reduces.size());
Assert.assertEquals(2, allocator.getNumOfPendingReduces());
Assert.assertEquals(3, allocator.getAsk().size());
for (ResourceRequest req : allocator.getAsk()) {
Assert.assertEquals(RMContainerAllocator.PRIORITY_REDUCE, req.getPriority());
Assert.assertTrue(req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2"));
Assert.assertEquals(Resource.newInstance(1024, 1), req.getCapability());
Assert.assertEquals(0, req.getNumContainers());
}
}
use of org.apache.hadoop.yarn.util.ControlledClock in project hadoop by apache.
the class TestRMContainerAllocator method testNonAggressivelyPreemptReducers.
@Test(timeout = 30000)
public void testNonAggressivelyPreemptReducers() throws Exception {
LOG.info("Running testNonAggressivelyPreemptReducers");
//sec
final int preemptThreshold = 2;
Configuration conf = new Configuration();
conf.setInt(MRJobConfig.MR_JOB_REDUCER_PREEMPT_DELAY_SEC, preemptThreshold);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
ControlledClock clock = new ControlledClock(null);
clock.setTime(1);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob, clock);
allocator.setMapResourceRequest(BuilderUtils.newResource(1024, 1));
allocator.setReduceResourceRequest(BuilderUtils.newResource(1024, 1));
RMContainerAllocator.AssignedRequests assignedRequests = allocator.getAssignedRequests();
RMContainerAllocator.ScheduledRequests scheduledRequests = allocator.getScheduledRequests();
ContainerRequestEvent event1 = createReq(jobId, 1, 2048, new String[] { "h1" }, false, false);
scheduledRequests.maps.put(mock(TaskAttemptId.class), new RMContainerRequestor.ContainerRequest(event1, null, clock.getTime()));
assignedRequests.reduces.put(mock(TaskAttemptId.class), mock(Container.class));
clock.setTime(clock.getTime() + 1);
allocator.preemptReducesIfNeeded();
Assert.assertEquals("The reducer is aggressively preeempted", 0, assignedRequests.preemptionWaitingReduces.size());
clock.setTime(clock.getTime() + (preemptThreshold) * 1000);
allocator.preemptReducesIfNeeded();
Assert.assertEquals("The reducer is not preeempted", 1, assignedRequests.preemptionWaitingReduces.size());
}
use of org.apache.hadoop.yarn.util.ControlledClock in project hadoop by apache.
the class TestRMContainerAllocator method testAvoidAskMoreReducersWhenReducerPreemptionIsRequired.
@Test
public void testAvoidAskMoreReducersWhenReducerPreemptionIsRequired() throws Exception {
LOG.info("Running testAvoidAskMoreReducersWhenReducerPreemptionIsRequired");
Configuration conf = new Configuration();
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// Use a controlled clock to advance time for test.
ControlledClock clock = (ControlledClock) allocator.getContext().getClock();
clock.setTime(System.currentTimeMillis());
// Register nodes to RM.
MockNM nodeManager = rm.registerNode("h1:1234", 1024);
dispatcher.await();
// Request 2 maps and 1 reducer(sone on nodes which are not registered).
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
allocator.sendRequest(event2);
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h2" }, false, true);
allocator.sendRequest(event3);
// This will tell the scheduler about the requests but there will be no
// allocations as nodes are not added.
allocator.schedule();
dispatcher.await();
// Advance clock so that maps can be considered as hanging.
clock.setTime(System.currentTimeMillis() + 500000L);
// Request for another reducer on h3 which has not registered.
ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
allocator.sendRequest(event4);
allocator.schedule();
dispatcher.await();
// Update resources in scheduler through node heartbeat from h1.
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
allocator.schedule();
dispatcher.await();
// One map is assigned.
Assert.assertEquals(1, allocator.getAssignedRequests().maps.size());
// Send deallocate request for map so that no maps are assigned after this.
ContainerAllocatorEvent deallocate = createDeallocateEvent(jobId, 1, false);
allocator.sendDeallocate(deallocate);
// Now one reducer should be scheduled and one should be pending.
Assert.assertEquals(1, allocator.getScheduledRequests().reduces.size());
Assert.assertEquals(1, allocator.getNumOfPendingReduces());
// No map should be assigned and one should be scheduled.
Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
Assert.assertEquals(6, allocator.getAsk().size());
for (ResourceRequest req : allocator.getAsk()) {
boolean isReduce = req.getPriority().equals(RMContainerAllocator.PRIORITY_REDUCE);
if (isReduce) {
// 1 reducer each asked on h2, * and default-rack
Assert.assertTrue((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2")) && req.getNumContainers() == 1);
} else {
//map
// 0 mappers asked on h1 and 1 each on * and default-rack
Assert.assertTrue(((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack")) && req.getNumContainers() == 1) || (req.getResourceName().equals("h1") && req.getNumContainers() == 0));
}
}
clock.setTime(System.currentTimeMillis() + 500000L + 10 * 60 * 1000);
// On next allocate request to scheduler, headroom reported will be 2048.
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 0));
allocator.schedule();
dispatcher.await();
// After allocate response from scheduler, all scheduled reduces are ramped
// down and move to pending. 3 asks are also updated with 0 containers to
// indicate ramping down of reduces to scheduler.
Assert.assertEquals(0, allocator.getScheduledRequests().reduces.size());
Assert.assertEquals(2, allocator.getNumOfPendingReduces());
Assert.assertEquals(3, allocator.getAsk().size());
for (ResourceRequest req : allocator.getAsk()) {
Assert.assertEquals(RMContainerAllocator.PRIORITY_REDUCE, req.getPriority());
Assert.assertTrue(req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2"));
Assert.assertEquals(Resource.newInstance(1024, 1), req.getCapability());
Assert.assertEquals(0, req.getNumContainers());
}
}
use of org.apache.hadoop.yarn.util.ControlledClock in project hadoop by apache.
the class TestFairScheduler method testMaxRunningAppsHierarchicalQueues.
@Test
public void testMaxRunningAppsHierarchicalQueues() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
ControlledClock clock = new ControlledClock();
scheduler.setClock(clock);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println(" <maxRunningApps>3</maxRunningApps>");
out.println(" <queue name=\"sub1\"></queue>");
out.println(" <queue name=\"sub2\"></queue>");
out.println(" <queue name=\"sub3\">");
out.println(" <maxRunningApps>1</maxRunningApps>");
out.println(" </queue>");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// exceeds no limits
ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1.sub1", "user1");
verifyAppRunnable(attId1, true);
verifyQueueNumRunnable("queue1.sub1", 1, 0);
clock.tickSec(10);
// exceeds no limits
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1.sub3", "user1");
verifyAppRunnable(attId2, true);
verifyQueueNumRunnable("queue1.sub3", 1, 0);
clock.tickSec(10);
// exceeds no limits
ApplicationAttemptId attId3 = createSchedulingRequest(1024, "queue1.sub2", "user1");
verifyAppRunnable(attId3, true);
verifyQueueNumRunnable("queue1.sub2", 1, 0);
clock.tickSec(10);
// exceeds queue1 limit
ApplicationAttemptId attId4 = createSchedulingRequest(1024, "queue1.sub2", "user1");
verifyAppRunnable(attId4, false);
verifyQueueNumRunnable("queue1.sub2", 1, 1);
clock.tickSec(10);
// exceeds sub3 limit
ApplicationAttemptId attId5 = createSchedulingRequest(1024, "queue1.sub3", "user1");
verifyAppRunnable(attId5, false);
verifyQueueNumRunnable("queue1.sub3", 1, 1);
clock.tickSec(10);
// Even though the app was removed from sub3, the app from sub2 gets to go
// because it came in first
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
verifyAppRunnable(attId4, true);
verifyQueueNumRunnable("queue1.sub2", 2, 0);
verifyAppRunnable(attId5, false);
verifyQueueNumRunnable("queue1.sub3", 0, 1);
// Now test removal of a non-runnable app
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.KILLED, true);
scheduler.handle(appRemovedEvent2);
assertEquals(0, scheduler.maxRunningEnforcer.usersNonRunnableApps.get("user1").size());
// verify app gone in queue accounting
verifyQueueNumRunnable("queue1.sub3", 0, 0);
// verify it doesn't become runnable when there would be space for it
AppAttemptRemovedSchedulerEvent appRemovedEvent3 = new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED, true);
scheduler.handle(appRemovedEvent3);
verifyQueueNumRunnable("queue1.sub2", 1, 0);
verifyQueueNumRunnable("queue1.sub3", 0, 0);
}
use of org.apache.hadoop.yarn.util.ControlledClock in project hadoop by apache.
the class TestWorkPreservingRMRestart method testNewContainersNotAllocatedDuringSchedulerRecovery.
@Test(timeout = 20000)
public void testNewContainersNotAllocatedDuringSchedulerRecovery() throws Exception {
conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 4000);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Restart RM
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1.registerNode();
ControlledClock clock = new ControlledClock();
long startTime = System.currentTimeMillis();
((RMContextImpl) rm2.getRMContext()).setSystemClock(clock);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt(true);
rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
// AM request for new containers
am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
List<Container> containers = new ArrayList<Container>();
clock.setTime(startTime + 2000);
nm1.nodeHeartbeat(true);
// sleep some time as allocation happens asynchronously.
Thread.sleep(3000);
containers.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
// container is not allocated during scheduling recovery.
Assert.assertTrue(containers.isEmpty());
clock.setTime(startTime + 8000);
nm1.nodeHeartbeat(true);
// Container is created after recovery is done.
while (containers.isEmpty()) {
containers.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
}
Aggregations