Search in sources :

Example 1 with TaskAttemptCompletionEventStatus

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus in project hadoop by apache.

the class TestTaskAttemptListenerImpl method createTce.

private static TaskAttemptCompletionEvent createTce(int eventId, boolean isMap, TaskAttemptCompletionEventStatus status) {
    JobId jid = MRBuilderUtils.newJobId(12345, 1, 1);
    TaskId tid = MRBuilderUtils.newTaskId(jid, 0, isMap ? org.apache.hadoop.mapreduce.v2.api.records.TaskType.MAP : org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE);
    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(tid, 0);
    RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
    TaskAttemptCompletionEvent tce = recordFactory.newRecordInstance(TaskAttemptCompletionEvent.class);
    tce.setEventId(eventId);
    tce.setAttemptId(attemptId);
    tce.setStatus(status);
    return tce;
}
Also used : TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) RecordFactory(org.apache.hadoop.yarn.factories.RecordFactory) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 2 with TaskAttemptCompletionEventStatus

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus in project hadoop by apache.

the class TaskImpl method recover.

/**
   * Recover a completed task from a previous application attempt
   * @param taskInfo recovered info about the task
   * @param recoverTaskOutput whether to recover task outputs
   * @return state of the task after recovery
   */
private TaskStateInternal recover(TaskInfo taskInfo, OutputCommitter committer, boolean recoverTaskOutput) {
    LOG.info("Recovering task " + taskId + " from prior app attempt, status was " + taskInfo.getTaskStatus());
    scheduledTime = taskInfo.getStartTime();
    sendTaskStartedEvent();
    Collection<TaskAttemptInfo> attemptInfos = taskInfo.getAllTaskAttempts().values();
    if (attemptInfos.size() > 0) {
        metrics.launchedTask(this);
    }
    // recover the attempts for this task in the order they finished
    // so task attempt completion events are ordered properly
    int savedNextAttemptNumber = nextAttemptNumber;
    ArrayList<TaskAttemptInfo> taInfos = new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
    Collections.sort(taInfos, TA_INFO_COMPARATOR);
    for (TaskAttemptInfo taInfo : taInfos) {
        nextAttemptNumber = taInfo.getAttemptId().getId();
        TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
        // handle the recovery inline so attempts complete before task does
        attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, committer, recoverTaskOutput));
        finishedAttempts.add(attempt.getID());
        TaskAttemptCompletionEventStatus taces = null;
        TaskAttemptState attemptState = attempt.getState();
        switch(attemptState) {
            case FAILED:
                taces = TaskAttemptCompletionEventStatus.FAILED;
                break;
            case KILLED:
                taces = TaskAttemptCompletionEventStatus.KILLED;
                break;
            case SUCCEEDED:
                taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
                break;
            default:
                throw new IllegalStateException("Unexpected attempt state during recovery: " + attemptState);
        }
        if (attemptState == TaskAttemptState.FAILED) {
            failedAttempts.add(attempt.getID());
            if (failedAttempts.size() >= maxAttempts) {
                taces = TaskAttemptCompletionEventStatus.TIPFAILED;
            }
        }
        // TODO: this shouldn't be necessary after MAPREDUCE-4330
        if (successfulAttempt == null) {
            handleTaskAttemptCompletion(attempt.getID(), taces);
            if (attemptState == TaskAttemptState.SUCCEEDED) {
                successfulAttempt = attempt.getID();
            }
        }
    }
    nextAttemptNumber = savedNextAttemptNumber;
    TaskStateInternal taskState = TaskStateInternal.valueOf(taskInfo.getTaskStatus());
    switch(taskState) {
        case SUCCEEDED:
            if (successfulAttempt != null) {
                sendTaskSucceededEvents();
            } else {
                LOG.info("Missing successful attempt for task " + taskId + ", recovering as RUNNING");
                // there must have been a fetch failure and the retry wasn't complete
                taskState = TaskStateInternal.RUNNING;
                metrics.runningTask(this);
                addAndScheduleAttempt(Avataar.VIRGIN);
            }
            break;
        case FAILED:
        case KILLED:
            {
                if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
                    metrics.endWaitingTask(this);
                }
                TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), taskInfo.getFinishTime(), taskInfo.getTaskType(), taskInfo.getError(), taskInfo.getTaskStatus(), taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
                eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
                eventHandler.handle(new JobTaskEvent(taskId, getExternalState(taskState)));
                break;
            }
        default:
            throw new java.lang.AssertionError("Unexpected recovered task state: " + taskState);
    }
    return taskState;
}
Also used : TaskStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal) ArrayList(java.util.ArrayList) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) TaskAttemptRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent)

Example 3 with TaskAttemptCompletionEventStatus

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus in project hadoop by apache.

the class TaskImpl method handleTaskAttemptCompletion.

// always called inside a transition, in turn inside the Write Lock
private void handleTaskAttemptCompletion(TaskAttemptId attemptId, TaskAttemptCompletionEventStatus status) {
    TaskAttempt attempt = attempts.get(attemptId);
    // to nextAttemptNumber
    if (attempt.getNodeHttpAddress() != null) {
        TaskAttemptCompletionEvent tce = recordFactory.newRecordInstance(TaskAttemptCompletionEvent.class);
        tce.setEventId(-1);
        String scheme = (encryptedShuffle) ? "https://" : "http://";
        tce.setMapOutputServerAddress(StringInterner.weakIntern(scheme + attempt.getNodeHttpAddress().split(":")[0] + ":" + attempt.getShufflePort()));
        tce.setStatus(status);
        tce.setAttemptId(attempt.getID());
        int runTime = 0;
        if (attempt.getFinishTime() != 0 && attempt.getLaunchTime() != 0)
            runTime = (int) (attempt.getFinishTime() - attempt.getLaunchTime());
        tce.setAttemptRunTime(runTime);
        //raise the event to job so that it adds the completion event to its
        //data structures
        eventHandler.handle(new JobTaskAttemptCompletedEvent(tce));
    }
}
Also used : JobTaskAttemptCompletedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent)

Example 4 with TaskAttemptCompletionEventStatus

use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus in project hadoop by apache.

the class CompletedJob method constructTaskAttemptCompletionEvents.

private void constructTaskAttemptCompletionEvents() {
    loadAllTasks();
    completionEvents = new LinkedList<TaskAttemptCompletionEvent>();
    List<TaskAttempt> allTaskAttempts = new LinkedList<TaskAttempt>();
    int numMapAttempts = 0;
    for (Map.Entry<TaskId, Task> taskEntry : tasks.entrySet()) {
        Task task = taskEntry.getValue();
        for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttemptEntry : task.getAttempts().entrySet()) {
            TaskAttempt taskAttempt = taskAttemptEntry.getValue();
            allTaskAttempts.add(taskAttempt);
            if (task.getType() == TaskType.MAP) {
                ++numMapAttempts;
            }
        }
    }
    Collections.sort(allTaskAttempts, new Comparator<TaskAttempt>() {

        @Override
        public int compare(TaskAttempt o1, TaskAttempt o2) {
            if (o1.getFinishTime() == 0 || o2.getFinishTime() == 0) {
                if (o1.getFinishTime() == 0 && o2.getFinishTime() == 0) {
                    if (o1.getLaunchTime() == 0 || o2.getLaunchTime() == 0) {
                        if (o1.getLaunchTime() == 0 && o2.getLaunchTime() == 0) {
                            return 0;
                        } else {
                            long res = o1.getLaunchTime() - o2.getLaunchTime();
                            return res > 0 ? -1 : 1;
                        }
                    } else {
                        return (int) (o1.getLaunchTime() - o2.getLaunchTime());
                    }
                } else {
                    long res = o1.getFinishTime() - o2.getFinishTime();
                    return res > 0 ? -1 : 1;
                }
            } else {
                return (int) (o1.getFinishTime() - o2.getFinishTime());
            }
        }
    });
    mapCompletionEvents = new ArrayList<TaskAttemptCompletionEvent>(numMapAttempts);
    int eventId = 0;
    for (TaskAttempt taskAttempt : allTaskAttempts) {
        TaskAttemptCompletionEvent tace = Records.newRecord(TaskAttemptCompletionEvent.class);
        int attemptRunTime = -1;
        if (taskAttempt.getLaunchTime() != 0 && taskAttempt.getFinishTime() != 0) {
            attemptRunTime = (int) (taskAttempt.getFinishTime() - taskAttempt.getLaunchTime());
        }
        // Default to KILLED
        TaskAttemptCompletionEventStatus taceStatus = TaskAttemptCompletionEventStatus.KILLED;
        String taStateString = taskAttempt.getState().toString();
        try {
            taceStatus = TaskAttemptCompletionEventStatus.valueOf(taStateString);
        } catch (Exception e) {
            LOG.warn("Cannot constuct TACEStatus from TaskAtemptState: [" + taStateString + "] for taskAttemptId: [" + taskAttempt.getID() + "]. Defaulting to KILLED");
        }
        tace.setAttemptId(taskAttempt.getID());
        tace.setAttemptRunTime(attemptRunTime);
        tace.setEventId(eventId++);
        tace.setMapOutputServerAddress(taskAttempt.getAssignedContainerMgrAddress());
        tace.setStatus(taceStatus);
        completionEvents.add(tace);
        if (taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP) {
            mapCompletionEvents.add(tace);
        }
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) LinkedList(java.util.LinkedList) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TaskAttemptCompletionEvent (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent)3 TaskAttemptCompletionEventStatus (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus)2 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)2 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)2 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)1 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)1 TaskFailedEvent (org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent)1 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)1 TaskAttemptState (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState)1 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)1 TaskStateInternal (org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal)1 JobTaskAttemptCompletedEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent)1