Search in sources :

Example 26 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 27 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestRMContainerAllocator method testUpdatedNodes.

@Test
public void testUpdatedNodes() throws Exception {
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nm1 = rm.registerNode("h1:1234", 10240);
    MockNM nm2 = rm.registerNode("h2:1234", 10240);
    dispatcher.await();
    // create the map container request
    ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event);
    TaskAttemptId attemptId = event.getAttemptID();
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
    Task mockTask = mock(Task.class);
    when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
    when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
    // this tells the scheduler about the requests
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    allocator.getJobUpdatedNodeEvents().clear();
    // get the assignment
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(1, assigned.size());
    Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
    // mark nodes bad
    nm1.nodeHeartbeat(false);
    nm2.nodeHeartbeat(false);
    dispatcher.await();
    // schedule response returns updated nodes
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // updated nodes are reported
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
    allocator.getJobUpdatedNodeEvents().clear();
    allocator.getTaskAttemptKillEvents().clear();
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 28 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestTaskImpl method testFailedTransitions.

@Test
public void testFailedTransitions() {
    mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, startCount, metrics, appContext, TaskType.MAP) {

        @Override
        protected int getMaxAttempts() {
            return 1;
        }
    };
    TaskId taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // add three more speculative attempts
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    launchTaskAttempt(getLastAttempt().getAttemptId());
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    launchTaskAttempt(getLastAttempt().getAttemptId());
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    launchTaskAttempt(getLastAttempt().getAttemptId());
    assertEquals(4, taskAttempts.size());
    // have the first attempt fail, verify task failed due to no retries
    MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
    taskAttempt.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.FAILED, mockTask.getState());
    // verify task can no longer be killed
    mockTask.handle(new TaskEvent(taskId, TaskEventType.T_KILL));
    assertEquals(TaskState.FAILED, mockTask.getState());
    // verify speculative doesn't launch new tasks
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_LAUNCHED));
    assertEquals(TaskState.FAILED, mockTask.getState());
    assertEquals(4, taskAttempts.size());
    // verify attempt events from active tasks don't knock task out of FAILED
    taskAttempt = taskAttempts.get(1);
    taskAttempt.setState(TaskAttemptState.COMMIT_PENDING);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_COMMIT_PENDING));
    assertEquals(TaskState.FAILED, mockTask.getState());
    taskAttempt.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.FAILED, mockTask.getState());
    taskAttempt = taskAttempts.get(2);
    taskAttempt.setState(TaskAttemptState.SUCCEEDED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED));
    assertEquals(TaskState.FAILED, mockTask.getState());
    taskAttempt = taskAttempts.get(3);
    taskAttempt.setState(TaskAttemptState.KILLED);
    mockTask.handle(new TaskTAttemptKilledEvent(taskAttempt.getAttemptId(), false));
    assertEquals(TaskState.FAILED, mockTask.getState());
}
Also used : TaskTAttemptKilledEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Test(org.junit.Test)

Example 29 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestTaskImpl method testFailedTransitionWithHangingSpeculativeMap.

@Test
public void testFailedTransitionWithHangingSpeculativeMap() {
    mockTask = new MockTaskImpl(jobId, partition, new PartialAttemptEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, startCount, metrics, appContext, TaskType.MAP) {

        @Override
        protected int getMaxAttempts() {
            return 4;
        }
    };
    // start a new task, schedule and launch a new attempt
    TaskId taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // add a speculative attempt(#2), but not launch it
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    // have the first attempt(#1) fail, verify task still running since the
    // max attempts is 4
    MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
    taskAttempt.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.RUNNING, mockTask.getState());
    // verify a new attempt(#3) added because the speculative attempt(#2)
    // is hanging
    assertEquals(3, taskAttempts.size());
    // verify the speculative attempt(#2) is not a rescheduled attempt
    assertEquals(false, taskAttempts.get(1).getRescheduled());
    // verify the third attempt is a rescheduled attempt
    assertEquals(true, taskAttempts.get(2).getRescheduled());
    // now launch the latest attempt(#3) and set the internal state to running
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // have the speculative attempt(#2) fail, verify task still since it
    // hasn't reach the max attempts which is 4
    MockTaskAttemptImpl taskAttempt1 = taskAttempts.get(1);
    taskAttempt1.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt1.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.RUNNING, mockTask.getState());
    // verify there's no new attempt added because of the running attempt(#3)
    assertEquals(3, taskAttempts.size());
}
Also used : TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Test(org.junit.Test)

Example 30 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestFail method testFailTask.

@Test
public //The job succeeds.
void testFailTask() throws Exception {
    MRApp app = new MockFirstFailingAttemptMRApp(1, 0);
    Configuration conf = new Configuration();
    // this test requires two task attempts, but uberization overrides max to 1
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    Job job = app.submit(conf);
    app.waitForState(job, JobState.SUCCEEDED);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
    Task task = tasks.values().iterator().next();
    Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState());
    Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
    Assert.assertEquals("Num attempts is not correct", 2, attempts.size());
    //one attempt must be failed 
    //and another must have succeeded
    Iterator<TaskAttempt> it = attempts.values().iterator();
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState());
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState());
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Test(org.junit.Test)

Aggregations

TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)102 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)86 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)76 Test (org.junit.Test)63 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)60 Configuration (org.apache.hadoop.conf.Configuration)45 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)32 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)32 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)29 ClientResponse (com.sun.jersey.api.client.ClientResponse)18 WebResource (com.sun.jersey.api.client.WebResource)18 JSONObject (org.codehaus.jettison.json.JSONObject)12 TaskAttemptReport (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport)9 IOException (java.io.IOException)8 Path (javax.ws.rs.Path)8 Produces (javax.ws.rs.Produces)8 StringReader (java.io.StringReader)7 HashMap (java.util.HashMap)7 GET (javax.ws.rs.GET)7 DocumentBuilder (javax.xml.parsers.DocumentBuilder)7