use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TaskAttemptImpl method recover.
@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
ContainerId containerId = taInfo.getContainerId();
NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
// Resource/Priority/Tokens are only needed while launching the container on
// an NM, these are already completed tasks, so setting them to null
container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
computeRackAndLocality();
launchTime = taInfo.getStartTime();
finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
shufflePort = taInfo.getShufflePort();
trackerName = taInfo.getHostname();
httpPort = taInfo.getHttpPort();
sendLaunchedEvents();
reportedStatus.id = attemptId;
reportedStatus.progress = 1.0f;
reportedStatus.counters = taInfo.getCounters();
reportedStatus.stateString = taInfo.getState();
reportedStatus.phase = Phase.CLEANUP;
reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
addDiagnosticInfo(taInfo.getError());
boolean needToClean = false;
String recoveredState = taInfo.getTaskStatus();
if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
try {
committer.recoverTask(tac);
LOG.info("Recovered output from task attempt " + attemptId);
} catch (Exception e) {
LOG.error("Unable to recover task attempt " + attemptId, e);
LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
recoveredState = TaskAttemptState.KILLED.toString();
needToClean = true;
}
}
TaskAttemptStateInternal attemptState;
if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
attemptState = TaskAttemptStateInternal.SUCCEEDED;
reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
logAttemptFinishedEvent(attemptState);
} else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
attemptState = TaskAttemptStateInternal.FAILED;
reportedStatus.taskState = TaskAttemptState.FAILED;
eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
} else {
if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
if (String.valueOf(recoveredState).isEmpty()) {
LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
} else {
LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
}
addDiagnosticInfo("Killed during application recovery");
needToClean = true;
}
attemptState = TaskAttemptStateInternal.KILLED;
reportedStatus.taskState = TaskAttemptState.KILLED;
eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
}
if (needToClean) {
TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
try {
committer.abortTask(tac);
} catch (Exception e) {
LOG.warn("Task cleanup failed for attempt " + attemptId, e);
}
}
return attemptState;
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TaskImpl method addAndScheduleAttempt.
// This is always called in the Write Lock
private void addAndScheduleAttempt(Avataar avataar, boolean reschedule) {
TaskAttempt attempt = addAttempt(avataar);
inProgressAttempts.add(attempt.getID());
//schedule the nextAttemptNumber
if (failedAttempts.size() > 0 || reschedule) {
eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_RESCHEDULE));
} else {
eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_SCHEDULE));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TaskImpl method getLaunchTime.
//this is always called in read/write lock
private long getLaunchTime() {
long taskLaunchTime = 0;
boolean launchTimeSet = false;
for (TaskAttempt at : attempts.values()) {
// select the least launch time of all attempts
long attemptLaunchTime = at.getLaunchTime();
if (attemptLaunchTime != 0 && !launchTimeSet) {
// For the first non-zero launch time
launchTimeSet = true;
taskLaunchTime = attemptLaunchTime;
} else if (attemptLaunchTime != 0 && taskLaunchTime > attemptLaunchTime) {
taskLaunchTime = attemptLaunchTime;
}
}
if (!launchTimeSet) {
return this.scheduledTime;
}
return taskLaunchTime;
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class JobImpl method actOnUnusableNode.
private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) {
// running reducers
if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) {
List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId);
if (taskAttemptIdList != null) {
String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId;
for (TaskAttemptId id : taskAttemptIdList) {
if (TaskType.MAP == id.getTaskId().getTaskType()) {
// reschedule only map tasks because their outputs maybe unusable
LOG.info(mesg + ". AttemptId:" + id);
// Kill the attempt and indicate that next map attempt should be
// rescheduled (i.e. considered as a fast fail map).
eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true));
}
}
}
}
// currently running task attempts on unusable nodes are handled in
// RMContainerAllocator
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TaskAttemptImpl method createJobCounterUpdateEventTAFailed.
private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed(TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) {
TaskType taskType = taskAttempt.getID().getTaskId().getTaskType();
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId());
if (taskType == TaskType.MAP) {
jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1);
} else {
jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1);
}
if (!taskAlreadyCompleted) {
updateMillisCounters(jce, taskAttempt);
}
return jce;
}
Aggregations