Search in sources :

Example 66 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TaskImpl method handle.

@Override
public void handle(TaskEvent event) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing " + event.getTaskID() + " of type " + event.getType());
    }
    try {
        writeLock.lock();
        TaskStateInternal oldState = getInternalState();
        try {
            stateMachine.doTransition(event.getType(), event);
        } catch (InvalidStateTransitionException e) {
            LOG.error("Can't handle this event at current state for " + this.taskId, e);
            internalError(event.getType());
        }
        if (oldState != getInternalState()) {
            LOG.info(taskId + " Task Transitioned from " + oldState + " to " + getInternalState());
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : TaskStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal) InvalidStateTransitionException(org.apache.hadoop.yarn.state.InvalidStateTransitionException)

Example 67 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TaskImpl method recover.

/**
   * Recover a completed task from a previous application attempt
   * @param taskInfo recovered info about the task
   * @param recoverTaskOutput whether to recover task outputs
   * @return state of the task after recovery
   */
private TaskStateInternal recover(TaskInfo taskInfo, OutputCommitter committer, boolean recoverTaskOutput) {
    LOG.info("Recovering task " + taskId + " from prior app attempt, status was " + taskInfo.getTaskStatus());
    scheduledTime = taskInfo.getStartTime();
    sendTaskStartedEvent();
    Collection<TaskAttemptInfo> attemptInfos = taskInfo.getAllTaskAttempts().values();
    if (attemptInfos.size() > 0) {
        metrics.launchedTask(this);
    }
    // recover the attempts for this task in the order they finished
    // so task attempt completion events are ordered properly
    int savedNextAttemptNumber = nextAttemptNumber;
    ArrayList<TaskAttemptInfo> taInfos = new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
    Collections.sort(taInfos, TA_INFO_COMPARATOR);
    for (TaskAttemptInfo taInfo : taInfos) {
        nextAttemptNumber = taInfo.getAttemptId().getId();
        TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
        // handle the recovery inline so attempts complete before task does
        attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, committer, recoverTaskOutput));
        finishedAttempts.add(attempt.getID());
        TaskAttemptCompletionEventStatus taces = null;
        TaskAttemptState attemptState = attempt.getState();
        switch(attemptState) {
            case FAILED:
                taces = TaskAttemptCompletionEventStatus.FAILED;
                break;
            case KILLED:
                taces = TaskAttemptCompletionEventStatus.KILLED;
                break;
            case SUCCEEDED:
                taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
                break;
            default:
                throw new IllegalStateException("Unexpected attempt state during recovery: " + attemptState);
        }
        if (attemptState == TaskAttemptState.FAILED) {
            failedAttempts.add(attempt.getID());
            if (failedAttempts.size() >= maxAttempts) {
                taces = TaskAttemptCompletionEventStatus.TIPFAILED;
            }
        }
        // TODO: this shouldn't be necessary after MAPREDUCE-4330
        if (successfulAttempt == null) {
            handleTaskAttemptCompletion(attempt.getID(), taces);
            if (attemptState == TaskAttemptState.SUCCEEDED) {
                successfulAttempt = attempt.getID();
            }
        }
    }
    nextAttemptNumber = savedNextAttemptNumber;
    TaskStateInternal taskState = TaskStateInternal.valueOf(taskInfo.getTaskStatus());
    switch(taskState) {
        case SUCCEEDED:
            if (successfulAttempt != null) {
                sendTaskSucceededEvents();
            } else {
                LOG.info("Missing successful attempt for task " + taskId + ", recovering as RUNNING");
                // there must have been a fetch failure and the retry wasn't complete
                taskState = TaskStateInternal.RUNNING;
                metrics.runningTask(this);
                addAndScheduleAttempt(Avataar.VIRGIN);
            }
            break;
        case FAILED:
        case KILLED:
            {
                if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
                    metrics.endWaitingTask(this);
                }
                TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), taskInfo.getFinishTime(), taskInfo.getTaskType(), taskInfo.getError(), taskInfo.getTaskStatus(), taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
                eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
                eventHandler.handle(new JobTaskEvent(taskId, getExternalState(taskState)));
                break;
            }
        default:
            throw new java.lang.AssertionError("Unexpected recovered task state: " + taskState);
    }
    return taskState;
}
Also used : TaskStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal) ArrayList(java.util.ArrayList) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) TaskAttemptRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent)

Example 68 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class DefaultSpeculator method maybeScheduleASpeculation.

private int maybeScheduleASpeculation(TaskType type) {
    int successes = 0;
    long now = clock.getTime();
    ConcurrentMap<JobId, AtomicInteger> containerNeeds = type == TaskType.MAP ? mapContainerNeeds : reduceContainerNeeds;
    for (ConcurrentMap.Entry<JobId, AtomicInteger> jobEntry : containerNeeds.entrySet()) {
        //  container prematurely.
        if (jobEntry.getValue().get() > 0) {
            continue;
        }
        int numberSpeculationsAlready = 0;
        int numberRunningTasks = 0;
        // loop through the tasks of the kind
        Job job = context.getJob(jobEntry.getKey());
        Map<TaskId, Task> tasks = job.getTasks(type);
        int numberAllowedSpeculativeTasks = (int) Math.max(minimumAllowedSpeculativeTasks, proportionTotalTasksSpeculatable * tasks.size());
        TaskId bestTaskID = null;
        long bestSpeculationValue = -1L;
        // TODO track the tasks that are potentially worth looking at
        for (Map.Entry<TaskId, Task> taskEntry : tasks.entrySet()) {
            long mySpeculationValue = speculationValue(taskEntry.getKey(), now);
            if (mySpeculationValue == ALREADY_SPECULATING) {
                ++numberSpeculationsAlready;
            }
            if (mySpeculationValue != NOT_RUNNING) {
                ++numberRunningTasks;
            }
            if (mySpeculationValue > bestSpeculationValue) {
                bestTaskID = taskEntry.getKey();
                bestSpeculationValue = mySpeculationValue;
            }
        }
        numberAllowedSpeculativeTasks = (int) Math.max(numberAllowedSpeculativeTasks, proportionRunningTasksSpeculatable * numberRunningTasks);
        // If we found a speculation target, fire it off
        if (bestTaskID != null && numberAllowedSpeculativeTasks > numberSpeculationsAlready) {
            addSpeculativeAttempt(bestTaskID);
            ++successes;
        }
    }
    return successes;
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) ConcurrentMap(java.util.concurrent.ConcurrentMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 69 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class DefaultSpeculator method containerNeed.

/*   *************************************************************    */
// This section contains the code that gets run for a SpeculatorEvent
private AtomicInteger containerNeed(TaskId taskID) {
    JobId jobID = taskID.getJobId();
    TaskType taskType = taskID.getTaskType();
    ConcurrentMap<JobId, AtomicInteger> relevantMap = taskType == TaskType.MAP ? mapContainerNeeds : reduceContainerNeeds;
    AtomicInteger result = relevantMap.get(jobID);
    if (result == null) {
        relevantMap.putIfAbsent(jobID, new AtomicInteger(0));
        result = relevantMap.get(jobID);
    }
    return result;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TaskType(org.apache.hadoop.mapreduce.v2.api.records.TaskType) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 70 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class DefaultSpeculator method statusUpdate.

/**
   * Absorbs one TaskAttemptStatus
   *
   * @param reportedStatus the status report that we got from a task attempt
   *        that we want to fold into the speculation data for this job
   * @param timestamp the time this status corresponds to.  This matters
   *        because statuses contain progress.
   */
protected void statusUpdate(TaskAttemptStatus reportedStatus, long timestamp) {
    String stateString = reportedStatus.taskState.toString();
    TaskAttemptId attemptID = reportedStatus.id;
    TaskId taskID = attemptID.getTaskId();
    Job job = context.getJob(taskID.getJobId());
    if (job == null) {
        return;
    }
    Task task = job.getTask(taskID);
    if (task == null) {
        return;
    }
    estimator.updateAttempt(reportedStatus, timestamp);
    if (stateString.equals(TaskAttemptState.RUNNING.name())) {
        runningTasks.putIfAbsent(taskID, Boolean.TRUE);
    } else {
        runningTasks.remove(taskID, Boolean.TRUE);
        if (!stateString.equals(TaskAttemptState.STARTING.name())) {
            runningTaskAttemptStatistics.remove(attemptID);
        }
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job)

Aggregations

TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)113 Test (org.junit.Test)75 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)69 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)60 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)58 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)56 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)42 Configuration (org.apache.hadoop.conf.Configuration)29 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)24 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)24 Path (org.apache.hadoop.fs.Path)23 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)22 HashMap (java.util.HashMap)20 JobConf (org.apache.hadoop.mapred.JobConf)17 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)17 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)17 MapTaskAttemptImpl (org.apache.hadoop.mapred.MapTaskAttemptImpl)16 TaskAttemptListener (org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener)16 InetSocketAddress (java.net.InetSocketAddress)15 TaskSplitMetaInfo (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo)15