Search in sources :

Example 1 with TaskFailedEvent

use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.

the class TaskImpl method recover.

/**
   * Recover a completed task from a previous application attempt
   * @param taskInfo recovered info about the task
   * @param recoverTaskOutput whether to recover task outputs
   * @return state of the task after recovery
   */
private TaskStateInternal recover(TaskInfo taskInfo, OutputCommitter committer, boolean recoverTaskOutput) {
    LOG.info("Recovering task " + taskId + " from prior app attempt, status was " + taskInfo.getTaskStatus());
    scheduledTime = taskInfo.getStartTime();
    sendTaskStartedEvent();
    Collection<TaskAttemptInfo> attemptInfos = taskInfo.getAllTaskAttempts().values();
    if (attemptInfos.size() > 0) {
        metrics.launchedTask(this);
    }
    // recover the attempts for this task in the order they finished
    // so task attempt completion events are ordered properly
    int savedNextAttemptNumber = nextAttemptNumber;
    ArrayList<TaskAttemptInfo> taInfos = new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
    Collections.sort(taInfos, TA_INFO_COMPARATOR);
    for (TaskAttemptInfo taInfo : taInfos) {
        nextAttemptNumber = taInfo.getAttemptId().getId();
        TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
        // handle the recovery inline so attempts complete before task does
        attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, committer, recoverTaskOutput));
        finishedAttempts.add(attempt.getID());
        TaskAttemptCompletionEventStatus taces = null;
        TaskAttemptState attemptState = attempt.getState();
        switch(attemptState) {
            case FAILED:
                taces = TaskAttemptCompletionEventStatus.FAILED;
                break;
            case KILLED:
                taces = TaskAttemptCompletionEventStatus.KILLED;
                break;
            case SUCCEEDED:
                taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
                break;
            default:
                throw new IllegalStateException("Unexpected attempt state during recovery: " + attemptState);
        }
        if (attemptState == TaskAttemptState.FAILED) {
            failedAttempts.add(attempt.getID());
            if (failedAttempts.size() >= maxAttempts) {
                taces = TaskAttemptCompletionEventStatus.TIPFAILED;
            }
        }
        // TODO: this shouldn't be necessary after MAPREDUCE-4330
        if (successfulAttempt == null) {
            handleTaskAttemptCompletion(attempt.getID(), taces);
            if (attemptState == TaskAttemptState.SUCCEEDED) {
                successfulAttempt = attempt.getID();
            }
        }
    }
    nextAttemptNumber = savedNextAttemptNumber;
    TaskStateInternal taskState = TaskStateInternal.valueOf(taskInfo.getTaskStatus());
    switch(taskState) {
        case SUCCEEDED:
            if (successfulAttempt != null) {
                sendTaskSucceededEvents();
            } else {
                LOG.info("Missing successful attempt for task " + taskId + ", recovering as RUNNING");
                // there must have been a fetch failure and the retry wasn't complete
                taskState = TaskStateInternal.RUNNING;
                metrics.runningTask(this);
                addAndScheduleAttempt(Avataar.VIRGIN);
            }
            break;
        case FAILED:
        case KILLED:
            {
                if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
                    metrics.endWaitingTask(this);
                }
                TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), taskInfo.getFinishTime(), taskInfo.getTaskType(), taskInfo.getError(), taskInfo.getTaskStatus(), taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
                eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
                eventHandler.handle(new JobTaskEvent(taskId, getExternalState(taskState)));
                break;
            }
        default:
            throw new java.lang.AssertionError("Unexpected recovered task state: " + taskState);
    }
    return taskState;
}
Also used : TaskStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal) ArrayList(java.util.ArrayList) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) TaskAttemptRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent)

Example 2 with TaskFailedEvent

use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.

the class TaskImpl method createTaskFailedEvent.

private static TaskFailedEvent createTaskFailedEvent(TaskImpl task, List<String> diag, TaskStateInternal taskState, TaskAttemptId taId) {
    StringBuilder errorSb = new StringBuilder();
    if (diag != null) {
        for (String d : diag) {
            errorSb.append(", ").append(d);
        }
    }
    TaskFailedEvent taskFailedEvent = new TaskFailedEvent(TypeConverter.fromYarn(task.taskId), // Hack since getFinishTime needs isFinished to be true and that doesn't happen till after the transition.
    task.getFinishTime(taId), TypeConverter.fromYarn(task.getType()), errorSb.toString(), taskState.toString(), taId == null ? null : TypeConverter.fromYarn(taId), task.getCounters());
    return taskFailedEvent;
}
Also used : TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent)

Example 3 with TaskFailedEvent

use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.

the class TestJobHistoryParsing method testMultipleFailedTasks.

@Test
public void testMultipleFailedTasks() throws Exception {
    JobHistoryParser parser = new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
    EventReader reader = Mockito.mock(EventReader.class);
    // Hack!
    final AtomicInteger numEventsRead = new AtomicInteger(0);
    final org.apache.hadoop.mapreduce.TaskType taskType = org.apache.hadoop.mapreduce.TaskType.MAP;
    final TaskID[] tids = new TaskID[2];
    final JobID jid = new JobID("1", 1);
    tids[0] = new TaskID(jid, taskType, 0);
    tids[1] = new TaskID(jid, taskType, 1);
    Mockito.when(reader.getNextEvent()).thenAnswer(new Answer<HistoryEvent>() {

        public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
            // send two task start and two task fail events for tasks 0 and 1
            int eventId = numEventsRead.getAndIncrement();
            TaskID tid = tids[eventId & 0x1];
            if (eventId < 2) {
                return new TaskStartedEvent(tid, 0, taskType, "");
            }
            if (eventId < 4) {
                TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType, "failed", "FAILED", null, new Counters());
                tfe.setDatum(tfe.getDatum());
                return tfe;
            }
            if (eventId < 5) {
                JobUnsuccessfulCompletionEvent juce = new JobUnsuccessfulCompletionEvent(jid, 100L, 2, 0, "JOB_FAILED", Collections.singletonList("Task failed: " + tids[0].toString()));
                return juce;
            }
            return null;
        }
    });
    JobInfo info = parser.parse(reader);
    assertTrue("Task 0 not implicated", info.getErrorInfo().contains(tids[0].toString()));
}
Also used : EventReader(org.apache.hadoop.mapreduce.jobhistory.EventReader) TaskID(org.apache.hadoop.mapreduce.TaskID) JobUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.JobUnsuccessfulCompletionEvent) IOException(java.io.IOException) HistoryEvent(org.apache.hadoop.mapreduce.jobhistory.HistoryEvent) TaskStartedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent) JobHistoryParser(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Counters(org.apache.hadoop.mapreduce.Counters) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Aggregations

TaskFailedEvent (org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent)3 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 Counters (org.apache.hadoop.mapreduce.Counters)1 JobID (org.apache.hadoop.mapreduce.JobID)1 TaskID (org.apache.hadoop.mapreduce.TaskID)1 EventReader (org.apache.hadoop.mapreduce.jobhistory.EventReader)1 HistoryEvent (org.apache.hadoop.mapreduce.jobhistory.HistoryEvent)1 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)1 JobHistoryParser (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser)1 JobInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo)1 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)1 JobUnsuccessfulCompletionEvent (org.apache.hadoop.mapreduce.jobhistory.JobUnsuccessfulCompletionEvent)1 TaskStartedEvent (org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent)1 TaskAttemptCompletionEventStatus (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus)1 TaskAttemptState (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState)1 TaskStateInternal (org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal)1 JobTaskEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent)1