Search in sources :

Example 1 with TaskAttemptStateInternal

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal in project hadoop by apache.

the class TaskAttemptImpl method recover.

@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
    ContainerId containerId = taInfo.getContainerId();
    NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
    String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
    // Resource/Priority/Tokens are only needed while launching the container on
    // an NM, these are already completed tasks, so setting them to null
    container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
    computeRackAndLocality();
    launchTime = taInfo.getStartTime();
    finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
    shufflePort = taInfo.getShufflePort();
    trackerName = taInfo.getHostname();
    httpPort = taInfo.getHttpPort();
    sendLaunchedEvents();
    reportedStatus.id = attemptId;
    reportedStatus.progress = 1.0f;
    reportedStatus.counters = taInfo.getCounters();
    reportedStatus.stateString = taInfo.getState();
    reportedStatus.phase = Phase.CLEANUP;
    reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
    reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
    reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
    addDiagnosticInfo(taInfo.getError());
    boolean needToClean = false;
    String recoveredState = taInfo.getTaskStatus();
    if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.recoverTask(tac);
            LOG.info("Recovered output from task attempt " + attemptId);
        } catch (Exception e) {
            LOG.error("Unable to recover task attempt " + attemptId, e);
            LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
            recoveredState = TaskAttemptState.KILLED.toString();
            needToClean = true;
        }
    }
    TaskAttemptStateInternal attemptState;
    if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.SUCCEEDED;
        reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
        eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
        logAttemptFinishedEvent(attemptState);
    } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.FAILED;
        reportedStatus.taskState = TaskAttemptState.FAILED;
        eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    } else {
        if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
            if (String.valueOf(recoveredState).isEmpty()) {
                LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
            } else {
                LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
            }
            addDiagnosticInfo("Killed during application recovery");
            needToClean = true;
        }
        attemptState = TaskAttemptStateInternal.KILLED;
        reportedStatus.taskState = TaskAttemptState.KILLED;
        eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    }
    if (needToClean) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.abortTask(tac);
        } catch (Exception e) {
            LOG.warn("Task cleanup failed for attempt " + attemptId, e);
        }
    }
    return attemptState;
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) NodeId(org.apache.hadoop.yarn.api.records.NodeId) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Example 2 with TaskAttemptStateInternal

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal in project hadoop by apache.

the class TaskAttemptImpl method handle.

@SuppressWarnings("unchecked")
@Override
public void handle(TaskAttemptEvent event) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing " + event.getTaskAttemptID() + " of type " + event.getType());
    }
    writeLock.lock();
    try {
        final TaskAttemptStateInternal oldState = getInternalState();
        try {
            stateMachine.doTransition(event.getType(), event);
        } catch (InvalidStateTransitionException e) {
            LOG.error("Can't handle this event at current state for " + this.attemptId, e);
            eventHandler.handle(new JobDiagnosticsUpdateEvent(this.attemptId.getTaskId().getJobId(), "Invalid event " + event.getType() + " on TaskAttempt " + this.attemptId));
            eventHandler.handle(new JobEvent(this.attemptId.getTaskId().getJobId(), JobEventType.INTERNAL_ERROR));
        }
        if (oldState != getInternalState()) {
            if (getInternalState() == TaskAttemptStateInternal.FAILED) {
                String nodeId = null == this.container ? "Not-assigned" : this.container.getNodeId().toString();
                LOG.info(attemptId + " transitioned from state " + oldState + " to " + getInternalState() + ", event type is " + event.getType() + " and nodeId=" + nodeId);
            } else {
                LOG.info(attemptId + " TaskAttempt Transitioned from " + oldState + " to " + getInternalState());
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) JobDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent)

Example 3 with TaskAttemptStateInternal

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal in project hadoop by apache.

the class MRApp method waitForInternalState.

public void waitForInternalState(TaskAttemptImpl attempt, TaskAttemptStateInternal finalState) throws Exception {
    int timeoutSecs = 0;
    TaskAttemptReport report = attempt.getReport();
    TaskAttemptStateInternal iState = attempt.getInternalState();
    while (!finalState.equals(iState) && timeoutSecs++ < 20) {
        System.out.println("TaskAttempt Internal State is : " + iState + " Waiting for Internal state : " + finalState + "   progress : " + report.getProgress());
        Thread.sleep(500);
        report = attempt.getReport();
        iState = attempt.getInternalState();
    }
    System.out.println("TaskAttempt Internal State is : " + iState);
    Assert.assertEquals("TaskAttempt Internal state is not correct (timedout)", finalState, iState);
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) TaskAttemptReport(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport)

Aggregations

TaskAttemptStateInternal (org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal)3 InvalidStateTransitionException (org.apache.hadoop.yarn.state.InvalidStateTransitionException)2 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 TaskAttemptContextImpl (org.apache.hadoop.mapred.TaskAttemptContextImpl)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)1 TaskAttemptUnsuccessfulCompletionEvent (org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)1 TaskAttemptReport (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport)1 JobDiagnosticsUpdateEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent)1 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)1 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)1 NodeId (org.apache.hadoop.yarn.api.records.NodeId)1 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)1