Search in sources :

Example 56 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TestTaskImpl method testFailedTransitionWithHangingSpeculativeMap.

@Test
public void testFailedTransitionWithHangingSpeculativeMap() {
    mockTask = new MockTaskImpl(jobId, partition, new PartialAttemptEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, startCount, metrics, appContext, TaskType.MAP) {

        @Override
        protected int getMaxAttempts() {
            return 4;
        }
    };
    // start a new task, schedule and launch a new attempt
    TaskId taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // add a speculative attempt(#2), but not launch it
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT));
    // have the first attempt(#1) fail, verify task still running since the
    // max attempts is 4
    MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
    taskAttempt.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.RUNNING, mockTask.getState());
    // verify a new attempt(#3) added because the speculative attempt(#2)
    // is hanging
    assertEquals(3, taskAttempts.size());
    // verify the speculative attempt(#2) is not a rescheduled attempt
    assertEquals(false, taskAttempts.get(1).getRescheduled());
    // verify the third attempt is a rescheduled attempt
    assertEquals(true, taskAttempts.get(2).getRescheduled());
    // now launch the latest attempt(#3) and set the internal state to running
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // have the speculative attempt(#2) fail, verify task still since it
    // hasn't reach the max attempts which is 4
    MockTaskAttemptImpl taskAttempt1 = taskAttempts.get(1);
    taskAttempt1.setState(TaskAttemptState.FAILED);
    mockTask.handle(new TaskTAttemptEvent(taskAttempt1.getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertEquals(TaskState.RUNNING, mockTask.getState());
    // verify there's no new attempt added because of the running attempt(#3)
    assertEquals(3, taskAttempts.size());
}
Also used : TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Test(org.junit.Test)

Example 57 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskImpl method getCounters.

@Override
public Counters getCounters() {
    Counters counters = null;
    readLock.lock();
    try {
        TaskAttempt bestAttempt = selectBestAttempt();
        if (bestAttempt != null) {
            counters = bestAttempt.getCounters();
        } else {
            counters = TaskAttemptImpl.EMPTY_COUNTERS;
        //        counters.groups = new HashMap<CharSequence, CounterGroup>();
        }
        return counters;
    } finally {
        readLock.unlock();
    }
}
Also used : Counters(org.apache.hadoop.mapreduce.Counters) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)

Example 58 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskAttemptImpl method notifyTaskAttemptFailed.

@SuppressWarnings("unchecked")
private static void notifyTaskAttemptFailed(TaskAttemptImpl taskAttempt) {
    if (taskAttempt.getLaunchTime() == 0) {
        sendJHStartEventForAssignedFailTask(taskAttempt);
    }
    // set the finish time
    taskAttempt.setFinishTime();
    taskAttempt.eventHandler.handle(createJobCounterUpdateEventTAFailed(taskAttempt, false));
    TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED);
    taskAttempt.eventHandler.handle(new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tauce));
    taskAttempt.eventHandler.handle(new TaskTAttemptEvent(taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
}
Also used : JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Example 59 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskAttemptImpl method recover.

@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
    ContainerId containerId = taInfo.getContainerId();
    NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
    String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
    // Resource/Priority/Tokens are only needed while launching the container on
    // an NM, these are already completed tasks, so setting them to null
    container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
    computeRackAndLocality();
    launchTime = taInfo.getStartTime();
    finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
    shufflePort = taInfo.getShufflePort();
    trackerName = taInfo.getHostname();
    httpPort = taInfo.getHttpPort();
    sendLaunchedEvents();
    reportedStatus.id = attemptId;
    reportedStatus.progress = 1.0f;
    reportedStatus.counters = taInfo.getCounters();
    reportedStatus.stateString = taInfo.getState();
    reportedStatus.phase = Phase.CLEANUP;
    reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
    reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
    reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
    addDiagnosticInfo(taInfo.getError());
    boolean needToClean = false;
    String recoveredState = taInfo.getTaskStatus();
    if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.recoverTask(tac);
            LOG.info("Recovered output from task attempt " + attemptId);
        } catch (Exception e) {
            LOG.error("Unable to recover task attempt " + attemptId, e);
            LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
            recoveredState = TaskAttemptState.KILLED.toString();
            needToClean = true;
        }
    }
    TaskAttemptStateInternal attemptState;
    if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.SUCCEEDED;
        reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
        eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
        logAttemptFinishedEvent(attemptState);
    } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
        attemptState = TaskAttemptStateInternal.FAILED;
        reportedStatus.taskState = TaskAttemptState.FAILED;
        eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    } else {
        if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
            if (String.valueOf(recoveredState).isEmpty()) {
                LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
            } else {
                LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
            }
            addDiagnosticInfo("Killed during application recovery");
            needToClean = true;
        }
        attemptState = TaskAttemptStateInternal.KILLED;
        reportedStatus.taskState = TaskAttemptState.KILLED;
        eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
        TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
        eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
    }
    if (needToClean) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
        try {
            committer.abortTask(tac);
        } catch (Exception e) {
            LOG.warn("Task cleanup failed for attempt " + attemptId, e);
        }
    }
    return attemptState;
}
Also used : TaskAttemptStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) NodeId(org.apache.hadoop.yarn.api.records.NodeId) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException) TaskAttemptUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent)

Example 60 with TaskAttempt

use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.

the class TaskImpl method addAndScheduleAttempt.

// This is always called in the Write Lock
private void addAndScheduleAttempt(Avataar avataar, boolean reschedule) {
    TaskAttempt attempt = addAttempt(avataar);
    inProgressAttempts.add(attempt.getID());
    //schedule the nextAttemptNumber
    if (failedAttempts.size() > 0 || reschedule) {
        eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_RESCHEDULE));
    } else {
        eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_SCHEDULE));
    }
}
Also used : TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)

Aggregations

TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)102 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)86 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)76 Test (org.junit.Test)63 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)60 Configuration (org.apache.hadoop.conf.Configuration)45 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)32 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)32 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)29 ClientResponse (com.sun.jersey.api.client.ClientResponse)18 WebResource (com.sun.jersey.api.client.WebResource)18 JSONObject (org.codehaus.jettison.json.JSONObject)12 TaskAttemptReport (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport)9 IOException (java.io.IOException)8 Path (javax.ws.rs.Path)8 Produces (javax.ws.rs.Produces)8 StringReader (java.io.StringReader)7 HashMap (java.util.HashMap)7 GET (javax.ws.rs.GET)7 DocumentBuilder (javax.xml.parsers.DocumentBuilder)7