use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.
/**
* Tests whether scheduled reducers are excluded from headroom while
* calculating headroom.
*/
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
LOG.info("Running testExcludeSchedReducesFromHeadroom");
Configuration conf = new Configuration();
conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
Task mockTask = mock(Task.class);
TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
MockNM nodeManager = rm.registerNode("h1:1234", 4096);
dispatcher.await();
// Register nodes to RM.
MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
dispatcher.await();
// Request 2 maps and 1 reducer(sone on nodes which are not registered).
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
allocator.sendRequest(event2);
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
allocator.sendRequest(event3);
// This will tell the scheduler about the requests but there will be no
// allocations as nodes are not added.
allocator.schedule();
dispatcher.await();
// Request for another reducer on h3 which has not registered.
ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
allocator.sendRequest(event4);
allocator.schedule();
dispatcher.await();
// Update resources in scheduler through node heartbeat from h1.
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
allocator.schedule();
dispatcher.await();
// Two maps are assigned.
Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
// Send deallocate request for map so that no maps are assigned after this.
ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
allocator.sendDeallocate(deallocate1);
ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
allocator.sendDeallocate(deallocate2);
// No map should be assigned.
Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
allocator.schedule();
dispatcher.await();
// h2 heartbeats.
nodeManager2.nodeHeartbeat(true);
dispatcher.await();
// Send request for one more mapper.
ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
allocator.sendRequest(event5);
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
allocator.schedule();
dispatcher.await();
// One reducer is assigned and one map is scheduled
Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
// Headroom enough to run a mapper if headroom is taken as it is but wont be
// enough if scheduled reducers resources are deducted.
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
allocator.schedule();
dispatcher.await();
// After allocate response, the one assigned reducer is preempted and killed
Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestRMContainerAllocator method testUpdatedNodes.
@Test
public void testUpdatedNodes() throws Exception {
Configuration conf = new Configuration();
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// add resources to scheduler
MockNM nm1 = rm.registerNode("h1:1234", 10240);
MockNM nm2 = rm.registerNode("h2:1234", 10240);
dispatcher.await();
// create the map container request
ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event);
TaskAttemptId attemptId = event.getAttemptID();
TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
Task mockTask = mock(Task.class);
when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
// this tells the scheduler about the requests
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
dispatcher.await();
nm1.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
allocator.getJobUpdatedNodeEvents().clear();
// get the assignment
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(1, assigned.size());
Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
// mark nodes bad
nm1.nodeHeartbeat(false);
nm2.nodeHeartbeat(false);
dispatcher.await();
// schedule response returns updated nodes
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// updated nodes are reported
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
allocator.getJobUpdatedNodeEvents().clear();
allocator.getTaskAttemptKillEvents().clear();
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestTaskImpl method testFailedTransitions.
@Test
public void testFailedTransitions() {
mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, startCount, metrics, appContext, TaskType.MAP) {
@Override
protected int getMaxAttempts() {
return 1;
}
};
TaskId taskId = getNewTaskID();
scheduleTaskAttempt(taskId);
launchTaskAttempt(getLastAttempt().getAttemptId());
// add three more speculative attempts
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
assertEquals(4, taskAttempts.size());
// have the first attempt fail, verify task failed due to no retries
MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
taskAttempt.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.FAILED, mockTask.getState());
// verify task can no longer be killed
mockTask.handle(new TaskEvent(taskId, TaskEventType.T_KILL));
assertEquals(TaskState.FAILED, mockTask.getState());
// verify speculative doesn't launch new tasks
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_LAUNCHED));
assertEquals(TaskState.FAILED, mockTask.getState());
assertEquals(4, taskAttempts.size());
// verify attempt events from active tasks don't knock task out of FAILED
taskAttempt = taskAttempts.get(1);
taskAttempt.setState(TaskAttemptState.COMMIT_PENDING);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_COMMIT_PENDING));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt = taskAttempts.get(2);
taskAttempt.setState(TaskAttemptState.SUCCEEDED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt = taskAttempts.get(3);
taskAttempt.setState(TaskAttemptState.KILLED);
mockTask.handle(new TaskTAttemptKilledEvent(taskAttempt.getAttemptId(), false));
assertEquals(TaskState.FAILED, mockTask.getState());
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestTaskImpl method testFailedTransitionWithHangingSpeculativeMap.
@Test
public void testFailedTransitionWithHangingSpeculativeMap() {
mockTask = new MockTaskImpl(jobId, partition, new PartialAttemptEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, startCount, metrics, appContext, TaskType.MAP) {
@Override
protected int getMaxAttempts() {
return 4;
}
};
// start a new task, schedule and launch a new attempt
TaskId taskId = getNewTaskID();
scheduleTaskAttempt(taskId);
launchTaskAttempt(getLastAttempt().getAttemptId());
// add a speculative attempt(#2), but not launch it
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
// have the first attempt(#1) fail, verify task still running since the
// max attempts is 4
MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
taskAttempt.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.RUNNING, mockTask.getState());
// verify a new attempt(#3) added because the speculative attempt(#2)
// is hanging
assertEquals(3, taskAttempts.size());
// verify the speculative attempt(#2) is not a rescheduled attempt
assertEquals(false, taskAttempts.get(1).getRescheduled());
// verify the third attempt is a rescheduled attempt
assertEquals(true, taskAttempts.get(2).getRescheduled());
// now launch the latest attempt(#3) and set the internal state to running
launchTaskAttempt(getLastAttempt().getAttemptId());
// have the speculative attempt(#2) fail, verify task still since it
// hasn't reach the max attempts which is 4
MockTaskAttemptImpl taskAttempt1 = taskAttempts.get(1);
taskAttempt1.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt1.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.RUNNING, mockTask.getState());
// verify there's no new attempt added because of the running attempt(#3)
assertEquals(3, taskAttempts.size());
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestFail method testFailTask.
@Test
public //The job succeeds.
void testFailTask() throws Exception {
MRApp app = new MockFirstFailingAttemptMRApp(1, 0);
Configuration conf = new Configuration();
// this test requires two task attempts, but uberization overrides max to 1
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
Job job = app.submit(conf);
app.waitForState(job, JobState.SUCCEEDED);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
Task task = tasks.values().iterator().next();
Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState());
Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
Assert.assertEquals("Num attempts is not correct", 2, attempts.size());
//one attempt must be failed
//and another must have succeeded
Iterator<TaskAttempt> it = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState());
Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState());
}
Aggregations