Search in sources :

Example 1 with AMNodeEventTaskAttemptEnded

use of org.apache.tez.dag.app.rm.node.AMNodeEventTaskAttemptEnded in project tez by apache.

the class TaskSchedulerManager method handleTAUnsuccessfulEnd.

private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
    TaskAttempt attempt = event.getAttempt();
    // Propagate state and failure cause (if any) when informing the scheduler about the de-allocation.
    boolean wasContainerAllocated = false;
    try {
        wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false, event.getTaskAttemptEndReason(), event.getDiagnostics());
    } catch (Exception e) {
        String msg = "Error in TaskScheduler for handling Task De-allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskAttemptId=" + attempt.getID();
        LOG.error(msg, e);
        sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, msg, e));
        return;
    }
    // use stored value of container id in case the scheduler has removed this
    // assignment because the task has been deallocated earlier.
    // retroactive case
    ContainerId attemptContainerId = attempt.getAssignedContainerID();
    if (!wasContainerAllocated) {
        LOG.info("Task: " + attempt.getID() + " has no container assignment in the scheduler");
        if (attemptContainerId != null) {
            LOG.error("No container allocated to task: " + attempt.getID() + " according to scheduler. Task reported container id: " + attemptContainerId);
        }
    }
    if (attemptContainerId != null) {
        // TODO either ways send the necessary events
        // Ask the container to stop.
        sendEvent(new AMContainerEventStopRequest(attemptContainerId));
        // Inform the Node - the task has asked to be STOPPED / has already
        // stopped.
        // AMNodeImpl blacklisting logic does not account for KILLED attempts.
        sendEvent(new AMNodeEventTaskAttemptEnded(appContext.getAllContainers().get(attemptContainerId).getContainer().getNodeId(), event.getSchedulerId(), attemptContainerId, attempt.getID(), event.getState() == TaskAttemptState.FAILED));
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) AMContainerEventStopRequest(org.apache.tez.dag.app.rm.container.AMContainerEventStopRequest) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) TezException(org.apache.tez.dag.api.TezException) AMNodeEventTaskAttemptEnded(org.apache.tez.dag.app.rm.node.AMNodeEventTaskAttemptEnded)

Aggregations

ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)1 TezException (org.apache.tez.dag.api.TezException)1 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)1 TaskAttempt (org.apache.tez.dag.app.dag.TaskAttempt)1 DAGAppMasterEventUserServiceFatalError (org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError)1 AMContainerEventStopRequest (org.apache.tez.dag.app.rm.container.AMContainerEventStopRequest)1 AMNodeEventTaskAttemptEnded (org.apache.tez.dag.app.rm.node.AMNodeEventTaskAttemptEnded)1