Search in sources :

Example 1 with TaskAttemptUnsuccessfulCompletionEvent

use of org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent in project hadoop by apache.

the class TaskAttemptImpl method notifyTaskAttemptFailed.

@SuppressWarnings("unchecked")
private static void notifyTaskAttemptFailed(TaskAttemptImpl taskAttempt) {
    if (taskAttempt.getLaunchTime() == 0) {
        sendJHStartEventForAssignedFailTask(taskAttempt);
    }
    // set the finish time
    taskAttempt.setFinishTime();
    taskAttempt.eventHandler.handle(createJobCounterUpdateEventTAFailed(taskAttempt, false));
    TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED);
    taskAttempt.eventHandler.handle(new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tauce));
    taskAttempt.eventHandler.handle(new TaskTAttemptEvent(taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
}
Also used : JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Example 2 with TaskAttemptUnsuccessfulCompletionEvent

use of org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent in project hadoop by apache.

the class TaskAttemptImpl method recover.

@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
    ContainerId containerId = taInfo.getContainerId();
    NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
    String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
    // Resource/Priority/Tokens are only needed while launching the container on
    // an NM, these are already completed tasks, so setting them to null
    container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
    computeRackAndLocality();
    launchTime = taInfo.getStartTime();
    finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
    shufflePort = taInfo.getShufflePort();
    trackerName = taInfo.getHostname();
    httpPort = taInfo.getHttpPort();
    sendLaunchedEvents();
    reportedStatus.id = attemptId;
    reportedStatus.progress = 1.0f;
    reportedStatus.counters = taInfo.getCounters();
    reportedStatus.stateString = taInfo.getState();
    reportedStatus.phase = Phase.CLEANUP;
    reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
    reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
    reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
    addDiagnosticInfo(taInfo.getError());
    boolean needToClean = false;
    String recoveredState = taInfo.getTaskStatus();
    if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.recoverTask(tac);
            LOG.info("Recovered output from task attempt " + attemptId);
        } catch (Exception e) {
            LOG.error("Unable to recover task attempt " + attemptId, e);
            LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
            recoveredState = TaskAttemptState.KILLED.toString();
            needToClean = true;
        }
    }
    TaskAttemptStateInternal attemptState;
    if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.SUCCEEDED;
        reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
        eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
        logAttemptFinishedEvent(attemptState);
    } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.FAILED;
        reportedStatus.taskState = TaskAttemptState.FAILED;
        eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    } else {
        if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
            if (String.valueOf(recoveredState).isEmpty()) {
                LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
            } else {
                LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
            }
            addDiagnosticInfo("Killed during application recovery");
            needToClean = true;
        }
        attemptState = TaskAttemptStateInternal.KILLED;
        reportedStatus.taskState = TaskAttemptState.KILLED;
        eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    }
    if (needToClean) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.abortTask(tac);
        } catch (Exception e) {
            LOG.warn("Task cleanup failed for attempt " + attemptId, e);
        }
    }
    return attemptState;
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) NodeId(org.apache.hadoop.yarn.api.records.NodeId) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Aggregations

JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)2 TaskAttemptUnsuccessfulCompletionEvent (org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)2 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 TaskAttemptContextImpl (org.apache.hadoop.mapred.TaskAttemptContextImpl)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1 TaskAttemptStateInternal (org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal)1 TaskTAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent)1 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)1 NodeId (org.apache.hadoop.yarn.api.records.NodeId)1 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)1 InvalidStateTransitionException (org.apache.hadoop.yarn.state.InvalidStateTransitionException)1