use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskImpl method canCommit.
@Override
public boolean canCommit(TezTaskAttemptID taskAttemptID) {
writeLock.lock();
try {
if (LOG.isDebugEnabled()) {
LOG.debug("Commit go/no-go request from " + taskAttemptID);
}
TaskState state = getState();
if (state == TaskState.SCHEDULED) {
// the actual running task ran and is done and asking for commit. we are still stuck
// in the scheduled state which indicates a backlog in event processing. lets wait for the
// backlog to clear. returning false will make the attempt come back to us.
LOG.info("Event processing delay. " + "Attempt committing before state machine transitioned to running : Task {}", taskId);
return false;
}
// have been in scheduled state in task impl.
if (state != TaskState.RUNNING) {
LOG.info("Task not running. Issuing kill to bad commit attempt " + taskAttemptID);
eventHandler.handle(new TaskAttemptEventKillRequest(taskAttemptID, "Task not running. Bad attempt.", TaskAttemptTerminationCause.TERMINATED_ORPHANED));
return false;
}
if (commitAttempt == null) {
TaskAttempt ta = getAttempt(taskAttemptID);
if (ta == null) {
throw new TezUncheckedException("Unknown task for commit: " + taskAttemptID);
}
// Its ok to get a non-locked state snapshot since we handle changes of
// state in the task attempt. Dont want to deadlock here.
TaskAttemptState taState = ta.getStateNoLock();
if (taState == TaskAttemptState.RUNNING) {
commitAttempt = taskAttemptID;
LOG.info(taskAttemptID + " given a go for committing the task output.");
return true;
} else {
LOG.info(taskAttemptID + " with state: " + taState + " given a no-go for commit because its not running.");
return false;
}
} else {
if (commitAttempt.equals(taskAttemptID)) {
if (LOG.isDebugEnabled()) {
LOG.debug(taskAttemptID + " already given a go for committing the task output.");
}
return true;
}
// succeeds then this and others will be killed
if (LOG.isDebugEnabled()) {
LOG.debug(commitAttempt + " is current committer. Commit waiting for: " + taskAttemptID);
}
return false;
}
} finally {
writeLock.unlock();
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskImpl method getCounters.
@Override
public TezCounters getCounters() {
TezCounters counters = new TezCounters();
counters.incrAllCounters(this.counters);
readLock.lock();
try {
TaskAttempt bestAttempt = selectBestAttempt();
if (bestAttempt != null) {
counters.incrAllCounters(bestAttempt.getCounters());
}
return counters;
} finally {
readLock.unlock();
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskSchedulerManager method handleTaLaunchRequest.
private void handleTaLaunchRequest(AMSchedulerEventTALaunchRequest event) {
TaskAttempt taskAttempt = event.getTaskAttempt();
TaskLocationHint locationHint = event.getLocationHint();
String[] hosts = null;
String[] racks = null;
if (locationHint != null) {
TaskBasedLocationAffinity taskAffinity = locationHint.getAffinitizedTask();
if (taskAffinity != null) {
Vertex vertex = appContext.getCurrentDAG().getVertex(taskAffinity.getVertexName());
Preconditions.checkNotNull(vertex, "Invalid vertex in task based affinity " + taskAffinity + " for attempt: " + taskAttempt.getID());
int taskIndex = taskAffinity.getTaskIndex();
Preconditions.checkState(taskIndex >= 0 && taskIndex < vertex.getTotalTasks(), "Invalid taskIndex in task based affinity " + taskAffinity + " for attempt: " + taskAttempt.getID());
TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt();
if (affinityAttempt != null) {
Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID());
try {
taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt, event.getCapability(), affinityAttempt.getAssignedContainerID(), Priority.newInstance(event.getPriority()), event.getContainerContext(), event);
} catch (Exception e) {
String msg = "Error in TaskScheduler for handling Task Allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskAttemptId=" + taskAttempt.getID();
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, msg, e));
}
return;
}
LOG.info("No attempt for task affinity to " + taskAffinity + " for attempt " + taskAttempt.getID() + " Ignoring.");
// fall through with null hosts/racks
} else {
hosts = (locationHint.getHosts() != null) ? locationHint.getHosts().toArray(new String[locationHint.getHosts().size()]) : null;
racks = (locationHint.getRacks() != null) ? locationHint.getRacks().toArray(new String[locationHint.getRacks().size()]) : null;
}
}
try {
taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt, event.getCapability(), hosts, racks, Priority.newInstance(event.getPriority()), event.getContainerContext(), event);
} catch (Exception e) {
String msg = "Error in TaskScheduler for handling Task Allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskAttemptId=" + taskAttempt.getID();
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, msg, e));
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskSchedulerManager method handleTAUnsuccessfulEnd.
private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
TaskAttempt attempt = event.getAttempt();
// Propagate state and failure cause (if any) when informing the scheduler about the de-allocation.
boolean wasContainerAllocated = false;
try {
wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false, event.getTaskAttemptEndReason(), event.getDiagnostics());
} catch (Exception e) {
String msg = "Error in TaskScheduler for handling Task De-allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskAttemptId=" + attempt.getID();
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, msg, e));
return;
}
// use stored value of container id in case the scheduler has removed this
// assignment because the task has been deallocated earlier.
// retroactive case
ContainerId attemptContainerId = attempt.getAssignedContainerID();
if (!wasContainerAllocated) {
LOG.info("Task: " + attempt.getID() + " has no container assignment in the scheduler");
if (attemptContainerId != null) {
LOG.error("No container allocated to task: " + attempt.getID() + " according to scheduler. Task reported container id: " + attemptContainerId);
}
}
if (attemptContainerId != null) {
// TODO either ways send the necessary events
// Ask the container to stop.
sendEvent(new AMContainerEventStopRequest(attemptContainerId));
// Inform the Node - the task has asked to be STOPPED / has already
// stopped.
// AMNodeImpl blacklisting logic does not account for KILLED attempts.
sendEvent(new AMNodeEventTaskAttemptEnded(appContext.getAllContainers().get(attemptContainerId).getContainer().getNodeId(), event.getSchedulerId(), attemptContainerId, attempt.getID(), event.getState() == TaskAttemptState.FAILED));
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskSchedulerManager method taskAllocated.
// TaskSchedulerAppCallback methods with schedulerId, where relevant
public synchronized void taskAllocated(int schedulerId, Object task, Object appCookie, Container container) {
AMSchedulerEventTALaunchRequest event = (AMSchedulerEventTALaunchRequest) appCookie;
ContainerId containerId = container.getId();
if (appContext.getAllContainers().addContainerIfNew(container, schedulerId, event.getLauncherId(), event.getTaskCommId())) {
appContext.getNodeTracker().nodeSeen(container.getNodeId(), schedulerId);
sendEvent(new AMNodeEventContainerAllocated(container.getNodeId(), schedulerId, container.getId()));
}
TaskAttempt taskAttempt = event.getTaskAttempt();
// taskAllocated() upcall
assert task.equals(taskAttempt);
if (appContext.getAllContainers().get(containerId).getState() == AMContainerState.ALLOCATED) {
sendEvent(new AMContainerEventLaunchRequest(containerId, taskAttempt.getVertexID(), event.getContainerContext(), event.getLauncherId(), event.getTaskCommId()));
}
sendEvent(new AMContainerEventAssignTA(containerId, taskAttempt.getID(), event.getRemoteTaskSpec(), event.getContainerContext().getLocalResources(), event.getContainerContext().getCredentials(), event.getPriority()));
}
Aggregations