use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.
the class TaskImpl method recover.
/**
* Recover a completed task from a previous application attempt
* @param taskInfo recovered info about the task
* @param recoverTaskOutput whether to recover task outputs
* @return state of the task after recovery
*/
private TaskStateInternal recover(TaskInfo taskInfo, OutputCommitter committer, boolean recoverTaskOutput) {
LOG.info("Recovering task " + taskId + " from prior app attempt, status was " + taskInfo.getTaskStatus());
scheduledTime = taskInfo.getStartTime();
sendTaskStartedEvent();
Collection<TaskAttemptInfo> attemptInfos = taskInfo.getAllTaskAttempts().values();
if (attemptInfos.size() > 0) {
metrics.launchedTask(this);
}
// recover the attempts for this task in the order they finished
// so task attempt completion events are ordered properly
int savedNextAttemptNumber = nextAttemptNumber;
ArrayList<TaskAttemptInfo> taInfos = new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
Collections.sort(taInfos, TA_INFO_COMPARATOR);
for (TaskAttemptInfo taInfo : taInfos) {
nextAttemptNumber = taInfo.getAttemptId().getId();
TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
// handle the recovery inline so attempts complete before task does
attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, committer, recoverTaskOutput));
finishedAttempts.add(attempt.getID());
TaskAttemptCompletionEventStatus taces = null;
TaskAttemptState attemptState = attempt.getState();
switch(attemptState) {
case FAILED:
taces = TaskAttemptCompletionEventStatus.FAILED;
break;
case KILLED:
taces = TaskAttemptCompletionEventStatus.KILLED;
break;
case SUCCEEDED:
taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
break;
default:
throw new IllegalStateException("Unexpected attempt state during recovery: " + attemptState);
}
if (attemptState == TaskAttemptState.FAILED) {
failedAttempts.add(attempt.getID());
if (failedAttempts.size() >= maxAttempts) {
taces = TaskAttemptCompletionEventStatus.TIPFAILED;
}
}
// TODO: this shouldn't be necessary after MAPREDUCE-4330
if (successfulAttempt == null) {
handleTaskAttemptCompletion(attempt.getID(), taces);
if (attemptState == TaskAttemptState.SUCCEEDED) {
successfulAttempt = attempt.getID();
}
}
}
nextAttemptNumber = savedNextAttemptNumber;
TaskStateInternal taskState = TaskStateInternal.valueOf(taskInfo.getTaskStatus());
switch(taskState) {
case SUCCEEDED:
if (successfulAttempt != null) {
sendTaskSucceededEvents();
} else {
LOG.info("Missing successful attempt for task " + taskId + ", recovering as RUNNING");
// there must have been a fetch failure and the retry wasn't complete
taskState = TaskStateInternal.RUNNING;
metrics.runningTask(this);
addAndScheduleAttempt(Avataar.VIRGIN);
}
break;
case FAILED:
case KILLED:
{
if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
metrics.endWaitingTask(this);
}
TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), taskInfo.getFinishTime(), taskInfo.getTaskType(), taskInfo.getError(), taskInfo.getTaskStatus(), taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
eventHandler.handle(new JobTaskEvent(taskId, getExternalState(taskState)));
break;
}
default:
throw new java.lang.AssertionError("Unexpected recovered task state: " + taskState);
}
return taskState;
}
use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.
the class TaskImpl method createTaskFailedEvent.
private static TaskFailedEvent createTaskFailedEvent(TaskImpl task, List<String> diag, TaskStateInternal taskState, TaskAttemptId taId) {
StringBuilder errorSb = new StringBuilder();
if (diag != null) {
for (String d : diag) {
errorSb.append(", ").append(d);
}
}
TaskFailedEvent taskFailedEvent = new TaskFailedEvent(TypeConverter.fromYarn(task.taskId), // Hack since getFinishTime needs isFinished to be true and that doesn't happen till after the transition.
task.getFinishTime(taId), TypeConverter.fromYarn(task.getType()), errorSb.toString(), taskState.toString(), taId == null ? null : TypeConverter.fromYarn(taId), task.getCounters());
return taskFailedEvent;
}
use of org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent in project hadoop by apache.
the class TestJobHistoryParsing method testMultipleFailedTasks.
@Test
public void testMultipleFailedTasks() throws Exception {
JobHistoryParser parser = new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
EventReader reader = Mockito.mock(EventReader.class);
// Hack!
final AtomicInteger numEventsRead = new AtomicInteger(0);
final org.apache.hadoop.mapreduce.TaskType taskType = org.apache.hadoop.mapreduce.TaskType.MAP;
final TaskID[] tids = new TaskID[2];
final JobID jid = new JobID("1", 1);
tids[0] = new TaskID(jid, taskType, 0);
tids[1] = new TaskID(jid, taskType, 1);
Mockito.when(reader.getNextEvent()).thenAnswer(new Answer<HistoryEvent>() {
public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
// send two task start and two task fail events for tasks 0 and 1
int eventId = numEventsRead.getAndIncrement();
TaskID tid = tids[eventId & 0x1];
if (eventId < 2) {
return new TaskStartedEvent(tid, 0, taskType, "");
}
if (eventId < 4) {
TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType, "failed", "FAILED", null, new Counters());
tfe.setDatum(tfe.getDatum());
return tfe;
}
if (eventId < 5) {
JobUnsuccessfulCompletionEvent juce = new JobUnsuccessfulCompletionEvent(jid, 100L, 2, 0, "JOB_FAILED", Collections.singletonList("Task failed: " + tids[0].toString()));
return juce;
}
return null;
}
});
JobInfo info = parser.parse(reader);
assertTrue("Task 0 not implicated", info.getErrorInfo().contains(tids[0].toString()));
}
Aggregations