Search in sources :

Example 11 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskAttemptImpl method recover.

@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
    ContainerId containerId = taInfo.getContainerId();
    NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
    String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
    // Resource/Priority/Tokens are only needed while launching the container on
    // an NM, these are already completed tasks, so setting them to null
    container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
    computeRackAndLocality();
    launchTime = taInfo.getStartTime();
    finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
    shufflePort = taInfo.getShufflePort();
    trackerName = taInfo.getHostname();
    httpPort = taInfo.getHttpPort();
    sendLaunchedEvents();
    reportedStatus.id = attemptId;
    reportedStatus.progress = 1.0f;
    reportedStatus.counters = taInfo.getCounters();
    reportedStatus.stateString = taInfo.getState();
    reportedStatus.phase = Phase.CLEANUP;
    reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
    reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
    reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
    addDiagnosticInfo(taInfo.getError());
    boolean needToClean = false;
    String recoveredState = taInfo.getTaskStatus();
    if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.recoverTask(tac);
            LOG.info("Recovered output from task attempt " + attemptId);
        } catch (Exception e) {
            LOG.error("Unable to recover task attempt " + attemptId, e);
            LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
            recoveredState = TaskAttemptState.KILLED.toString();
            needToClean = true;
        }
    }
    TaskAttemptStateInternal attemptState;
    if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.SUCCEEDED;
        reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
        eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
        logAttemptFinishedEvent(attemptState);
    } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.FAILED;
        reportedStatus.taskState = TaskAttemptState.FAILED;
        eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    } else {
        if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
            if (String.valueOf(recoveredState).isEmpty()) {
                LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
            } else {
                LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
            }
            addDiagnosticInfo("Killed during application recovery");
            needToClean = true;
        }
        attemptState = TaskAttemptStateInternal.KILLED;
        reportedStatus.taskState = TaskAttemptState.KILLED;
        eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    }
    if (needToClean) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.abortTask(tac);
        } catch (Exception e) {
            LOG.warn("Task cleanup failed for attempt " + attemptId, e);
        }
    }
    return attemptState;
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) NodeId(org.apache.hadoop.yarn.api.records.NodeId) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Example 12 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskImpl method addAndScheduleAttempt.

// This is always called in the Write Lock
private void addAndScheduleAttempt(Avataar avataar, boolean reschedule) {
    TaskAttempt attempt = addAttempt(avataar);
    inProgressAttempts.add(attempt.getID());
    //schedule the nextAttemptNumber
    if (failedAttempts.size() > 0 || reschedule) {
        eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_RESCHEDULE));
    } else {
        eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_SCHEDULE));
    }
}
Also used : TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)

Example 13 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskImpl method getLaunchTime.

//this is always called in read/write lock
private long getLaunchTime() {
    long taskLaunchTime = 0;
    boolean launchTimeSet = false;
    for (TaskAttempt at : attempts.values()) {
        // select the least launch time of all attempts
        long attemptLaunchTime = at.getLaunchTime();
        if (attemptLaunchTime != 0 && !launchTimeSet) {
            // For the first non-zero launch time
            launchTimeSet = true;
            taskLaunchTime = attemptLaunchTime;
        } else if (attemptLaunchTime != 0 && taskLaunchTime > attemptLaunchTime) {
            taskLaunchTime = attemptLaunchTime;
        }
    }
    if (!launchTimeSet) {
        return this.scheduledTime;
    }
    return taskLaunchTime;
}
Also used : TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)

Example 14 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class JobImpl method actOnUnusableNode.

private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) {
    // running reducers
    if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) {
        List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId);
        if (taskAttemptIdList != null) {
            String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId;
            for (TaskAttemptId id : taskAttemptIdList) {
                if (TaskType.MAP == id.getTaskId().getTaskType()) {
                    // reschedule only map tasks because their outputs maybe unusable
                    LOG.info(mesg + ". AttemptId:" + id);
                    // Kill the attempt and indicate that next map attempt should be
                    // rescheduled (i.e. considered as a fast fail map).
                    eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true));
                }
            }
        }
    }
// currently running task attempts on unusable nodes are handled in
// RMContainerAllocator
}
Also used : TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptKillEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent)

Example 15 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskAttemptImpl method createJobCounterUpdateEventTAFailed.

private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed(TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) {
    TaskType taskType = taskAttempt.getID().getTaskId().getTaskType();
    JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId());
    if (taskType == TaskType.MAP) {
        jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1);
    } else {
        jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1);
    }
    if (!taskAlreadyCompleted) {
        updateMillisCounters(jce, taskAttempt);
    }
    return jce;
}
Also used : TaskType(org.apache.hadoop.mapreduce.v2.api.records.TaskType) JobCounterUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent)

Aggregations

TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)102 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)86 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)76 Test (org.junit.Test)63 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)60 Configuration (org.apache.hadoop.conf.Configuration)45 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)32 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)32 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)29 ClientResponse (com.sun.jersey.api.client.ClientResponse)18 WebResource (com.sun.jersey.api.client.WebResource)18 JSONObject (org.codehaus.jettison.json.JSONObject)12 TaskAttemptReport (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport)9 IOException (java.io.IOException)8 Path (javax.ws.rs.Path)8 Produces (javax.ws.rs.Produces)8 StringReader (java.io.StringReader)7 HashMap (java.util.HashMap)7 GET (javax.ws.rs.GET)7 DocumentBuilder (javax.xml.parsers.DocumentBuilder)7