Search in sources :

Example 11 with TaskAttemptCompletionEvent

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent in project hadoop by apache.

the class TestMRApp method testUpdatedNodes.

/**
   * The test verifies that the AM re-runs maps that have run on bad nodes. It
   * also verifies that the AM records all success/killed events so that reduces
   * are notified about map output status changes. It also verifies that the
   * re-run information is preserved across AM restart
   */
@Test
public void testUpdatedNodes() throws Exception {
    int runCount = 0;
    Dispatcher disp = Mockito.spy(new AsyncDispatcher());
    MRApp app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), true, ++runCount, disp);
    Configuration conf = new Configuration();
    // after half of the map completion, reduce will start
    conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
    // uberization forces full slowstart (1.0), so disable that
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    ContainerAllocEventHandler handler = new ContainerAllocEventHandler();
    disp.register(ContainerAllocator.EventType.class, handler);
    final Job job1 = app.submit(conf);
    app.waitForState(job1, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
    Iterator<Task> it = job1.getTasks().values().iterator();
    Task mapTask1 = it.next();
    Task mapTask2 = it.next();
    // all maps must be running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);
    TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
    TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
    NodeId node1 = task1Attempt.getNodeId();
    NodeId node2 = task2Attempt.getNodeId();
    Assert.assertEquals(node1, node2);
    // send the done signal to the task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
    // all maps must be succeeded
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.SUCCEEDED);
    final int checkIntervalMillis = 100;
    final int waitForMillis = 800;
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 2;
        }
    }, checkIntervalMillis, waitForMillis);
    TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 completion events for success", 2, events.length);
    // send updated nodes info
    ArrayList<NodeReport> updatedNodes = new ArrayList<NodeReport>();
    NodeReport nr = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(NodeReport.class);
    nr.setNodeId(node1);
    nr.setNodeState(NodeState.UNHEALTHY);
    updatedNodes.add(nr);
    app.getContext().getEventHandler().handle(new JobUpdatedNodesEvent(job1.getID(), updatedNodes));
    app.waitForState(task1Attempt, TaskAttemptState.KILLED);
    app.waitForState(task2Attempt, TaskAttemptState.KILLED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 4;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 more completion events for killed", 4, events.length);
    // 2 map task attempts which were killed above should be requested from
    // container allocator with the previous map task marked as failed. If
    // this happens allocator will request the container for this mapper from
    // RM at a higher priority of 5(i.e. with a priority equivalent to that of
    // a fail fast map).
    handler.waitForFailedMapContainerReqEvents(2);
    // all maps must be back to running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);
    Iterator<TaskAttempt> itr = mapTask1.getAttempts().values().iterator();
    itr.next();
    task1Attempt = itr.next();
    // send the done signal to the task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
    // map1 must be succeeded. map2 must be running
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.RUNNING);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 5;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 1 more completion events for success", 5, events.length);
    // Crash the app again.
    app.stop();
    // rerun
    // in rerun the 1st map will be recovered from previous run
    app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), false, ++runCount, (Dispatcher) new AsyncDispatcher());
    conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    final Job job2 = app.submit(conf);
    app.waitForState(job2, JobState.RUNNING);
    Assert.assertEquals("No of tasks not correct", 4, job2.getTasks().size());
    it = job2.getTasks().values().iterator();
    mapTask1 = it.next();
    mapTask2 = it.next();
    Task reduceTask1 = it.next();
    Task reduceTask2 = it.next();
    // map 1 will be recovered, no need to send done
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.RUNNING);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 2;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 completion events for killed & success of map1", 2, events.length);
    task2Attempt = mapTask2.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(mapTask2, TaskState.SUCCEEDED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 3;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 1 more completion events for success", 3, events.length);
    app.waitForState(reduceTask1, TaskState.RUNNING);
    app.waitForState(reduceTask2, TaskState.RUNNING);
    TaskAttempt task3Attempt = reduceTask1.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(reduceTask1, TaskState.SUCCEEDED);
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_KILL));
    app.waitForState(reduceTask1, TaskState.SUCCEEDED);
    TaskAttempt task4Attempt = reduceTask2.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task4Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(reduceTask2, TaskState.SUCCEEDED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 5;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 more completion events for reduce success", 5, events.length);
    // job succeeds
    app.waitForState(job2, JobState.SUCCEEDED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) ContainerAllocator(org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobUpdatedNodesEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobUpdatedNodesEvent) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 12 with TaskAttemptCompletionEvent

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent in project hadoop by apache.

the class GetTaskAttemptCompletionEventsResponsePBImpl method initCompletionEvents.

private void initCompletionEvents() {
    if (this.completionEvents != null) {
        return;
    }
    GetTaskAttemptCompletionEventsResponseProtoOrBuilder p = viaProto ? proto : builder;
    List<TaskAttemptCompletionEventProto> list = p.getCompletionEventsList();
    this.completionEvents = new ArrayList<TaskAttemptCompletionEvent>();
    for (TaskAttemptCompletionEventProto c : list) {
        this.completionEvents.add(convertFromProtoFormat(c));
    }
}
Also used : TaskAttemptCompletionEventProto(org.apache.hadoop.mapreduce.v2.proto.MRProtos.TaskAttemptCompletionEventProto) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) GetTaskAttemptCompletionEventsResponseProtoOrBuilder(org.apache.hadoop.mapreduce.v2.proto.MRServiceProtos.GetTaskAttemptCompletionEventsResponseProtoOrBuilder)

Example 13 with TaskAttemptCompletionEvent

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent in project hadoop by apache.

the class CompletedJob method constructTaskAttemptCompletionEvents.

private void constructTaskAttemptCompletionEvents() {
    loadAllTasks();
    completionEvents = new LinkedList<TaskAttemptCompletionEvent>();
    List<TaskAttempt> allTaskAttempts = new LinkedList<TaskAttempt>();
    int numMapAttempts = 0;
    for (Map.Entry<TaskId, Task> taskEntry : tasks.entrySet()) {
        Task task = taskEntry.getValue();
        for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttemptEntry : task.getAttempts().entrySet()) {
            TaskAttempt taskAttempt = taskAttemptEntry.getValue();
            allTaskAttempts.add(taskAttempt);
            if (task.getType() == TaskType.MAP) {
                ++numMapAttempts;
            }
        }
    }
    Collections.sort(allTaskAttempts, new Comparator<TaskAttempt>() {

        @Override
        public int compare(TaskAttempt o1, TaskAttempt o2) {
            if (o1.getFinishTime() == 0 || o2.getFinishTime() == 0) {
                if (o1.getFinishTime() == 0 && o2.getFinishTime() == 0) {
                    if (o1.getLaunchTime() == 0 || o2.getLaunchTime() == 0) {
                        if (o1.getLaunchTime() == 0 && o2.getLaunchTime() == 0) {
                            return 0;
                        } else {
                            long res = o1.getLaunchTime() - o2.getLaunchTime();
                            return res > 0 ? -1 : 1;
                        }
                    } else {
                        return (int) (o1.getLaunchTime() - o2.getLaunchTime());
                    }
                } else {
                    long res = o1.getFinishTime() - o2.getFinishTime();
                    return res > 0 ? -1 : 1;
                }
            } else {
                return (int) (o1.getFinishTime() - o2.getFinishTime());
            }
        }
    });
    mapCompletionEvents = new ArrayList<TaskAttemptCompletionEvent>(numMapAttempts);
    int eventId = 0;
    for (TaskAttempt taskAttempt : allTaskAttempts) {
        TaskAttemptCompletionEvent tace = Records.newRecord(TaskAttemptCompletionEvent.class);
        int attemptRunTime = -1;
        if (taskAttempt.getLaunchTime() != 0 && taskAttempt.getFinishTime() != 0) {
            attemptRunTime = (int) (taskAttempt.getFinishTime() - taskAttempt.getLaunchTime());
        }
        // Default to KILLED
        TaskAttemptCompletionEventStatus taceStatus = TaskAttemptCompletionEventStatus.KILLED;
        String taStateString = taskAttempt.getState().toString();
        try {
            taceStatus = TaskAttemptCompletionEventStatus.valueOf(taStateString);
        } catch (Exception e) {
            LOG.warn("Cannot constuct TACEStatus from TaskAtemptState: [" + taStateString + "] for taskAttemptId: [" + taskAttempt.getID() + "]. Defaulting to KILLED");
        }
        tace.setAttemptId(taskAttempt.getID());
        tace.setAttemptRunTime(attemptRunTime);
        tace.setEventId(eventId++);
        tace.setMapOutputServerAddress(taskAttempt.getAssignedContainerMgrAddress());
        tace.setStatus(taceStatus);
        completionEvents.add(tace);
        if (taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP) {
            mapCompletionEvents.add(tace);
        }
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) LinkedList(java.util.LinkedList) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) HashMap(java.util.HashMap) Map(java.util.Map)

Example 14 with TaskAttemptCompletionEvent

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent in project hadoop by apache.

the class NotRunningJob method getTaskAttemptCompletionEvents.

@Override
public GetTaskAttemptCompletionEventsResponse getTaskAttemptCompletionEvents(GetTaskAttemptCompletionEventsRequest request) throws IOException {
    GetTaskAttemptCompletionEventsResponse resp = recordFactory.newRecordInstance(GetTaskAttemptCompletionEventsResponse.class);
    resp.addAllCompletionEvents(new ArrayList<TaskAttemptCompletionEvent>());
    return resp;
}
Also used : GetTaskAttemptCompletionEventsResponse(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptCompletionEventsResponse) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent)

Aggregations

TaskAttemptCompletionEvent (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent)13 Configuration (org.apache.hadoop.conf.Configuration)7 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)7 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)7 Test (org.junit.Test)7 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)6 TaskCompletionEvent (org.apache.hadoop.mapred.TaskCompletionEvent)5 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)5 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)4 HashMap (java.util.HashMap)3 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)3 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)3 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 JobTaskAttemptCompletedEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent)2 JobUpdatedNodesEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobUpdatedNodesEvent)2 Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1