Search in sources :

Example 16 with DAGAppMasterEventUserServiceFatalError

use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.

the class TaskCommunicatorManager method registerTaskAttempt.

@Override
public void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId, int taskCommId) {
    ContainerInfo containerInfo = registeredContainers.get(containerId);
    if (containerInfo == null) {
        throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to unknown container: " + containerId);
    }
    if (containerInfo.taskAttemptId != null) {
        throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId + " with existing assignment to: " + containerInfo.taskAttemptId);
    }
    // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
    registeredContainers.put(containerId, new ContainerInfo(amContainerTask.getTask().getTaskAttemptID()));
    ContainerId containerIdFromMap = registeredAttempts.put(amContainerTask.getTask().getTaskAttemptID(), containerId);
    if (containerIdFromMap != null) {
        throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId + " when already assigned to: " + containerIdFromMap);
    }
    try {
        taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(), amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(), amContainerTask.haveCredentialsChanged(), amContainerTask.getPriority());
    } catch (Exception e) {
        String msg = "Error in TaskCommunicator when registering Task Attempt" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", taskId=" + amContainerTask.getTask().getTaskAttemptID();
        LOG.error(msg, e);
        sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) InvocationTargetException(java.lang.reflect.InvocationTargetException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException)

Example 17 with DAGAppMasterEventUserServiceFatalError

use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.

the class TaskCommunicatorManager method unregisterTaskAttempt.

@Override
public void unregisterTaskAttempt(TezTaskAttemptID attemptId, int taskCommId, TaskAttemptEndReason endReason, String diagnostics) {
    ContainerId containerId = registeredAttempts.remove(attemptId);
    if (containerId == null) {
        LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
        return;
    }
    ContainerInfo containerInfo = registeredContainers.get(containerId);
    if (containerInfo == null) {
        LOG.warn("Unregister task attempt: " + attemptId + " from non-registered container: " + containerId);
        return;
    }
    // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
    registeredContainers.put(containerId, NULL_CONTAINER_INFO);
    try {
        taskCommunicators[taskCommId].unregisterRunningTaskAttempt(attemptId, endReason, diagnostics);
    } catch (Exception e) {
        String msg = "Error in TaskCommunicator when unregistering Task Attempt" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", taskId=" + attemptId;
        LOG.error(msg, e);
        sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) InvocationTargetException(java.lang.reflect.InvocationTargetException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException)

Example 18 with DAGAppMasterEventUserServiceFatalError

use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.

the class TaskCommunicatorManager method registerRunningContainer.

@Override
public void registerRunningContainer(ContainerId containerId, int taskCommId) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
    }
    ContainerInfo oldInfo = registeredContainers.put(containerId, NULL_CONTAINER_INFO);
    if (oldInfo != null) {
        throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
    }
    NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
    try {
        taskCommunicators[taskCommId].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
    } catch (Exception e) {
        String msg = "Error in TaskCommunicator when registering running Container" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", nodeId=" + nodeId;
        LOG.error(msg, e);
        sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) NodeId(org.apache.hadoop.yarn.api.records.NodeId) InvocationTargetException(java.lang.reflect.InvocationTargetException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException)

Example 19 with DAGAppMasterEventUserServiceFatalError

use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.

the class ContainerLauncherManager method handle.

@Override
public void handle(ContainerLauncherEvent event) {
    int launcherId = event.getLauncherId();
    String schedulerName = appContext.getTaskSchedulerName(event.getSchedulerId());
    String taskCommName = appContext.getTaskCommunicatorName(event.getTaskCommId());
    switch(event.getType()) {
        case CONTAINER_LAUNCH_REQUEST:
            ContainerLauncherLaunchRequestEvent launchEvent = (ContainerLauncherLaunchRequestEvent) event;
            ContainerLaunchRequest launchRequest = new ContainerLaunchRequest(launchEvent.getNodeId(), launchEvent.getContainerId(), launchEvent.getContainerToken(), launchEvent.getContainerLaunchContext(), launchEvent.getContainer(), schedulerName, taskCommName);
            try {
                containerLaunchers[launcherId].launchContainer(launchRequest);
            } catch (Exception e) {
                String msg = "Error when launching container" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(launcherId, appContext) + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskCommunicator=" + Utils.getTaskCommIdentifierString(event.getTaskCommId(), appContext);
                LOG.error(msg, e);
                sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, e));
            }
            break;
        case CONTAINER_STOP_REQUEST:
            ContainerStopRequest stopRequest = new ContainerStopRequest(event.getNodeId(), event.getContainerId(), event.getContainerToken(), schedulerName, taskCommName);
            try {
                containerLaunchers[launcherId].stopContainer(stopRequest);
            } catch (Exception e) {
                String msg = "Error when stopping container" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(launcherId, appContext) + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskCommunicator=" + Utils.getTaskCommIdentifierString(event.getTaskCommId(), appContext);
                LOG.error(msg, e);
                sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, e));
            }
            break;
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError) ContainerLaunchRequest(org.apache.tez.serviceplugins.api.ContainerLaunchRequest) ContainerStopRequest(org.apache.tez.serviceplugins.api.ContainerStopRequest) ContainerLauncherLaunchRequestEvent(org.apache.tez.dag.app.rm.ContainerLauncherLaunchRequestEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) UnknownHostException(java.net.UnknownHostException) TezException(org.apache.tez.dag.api.TezException)

Example 20 with DAGAppMasterEventUserServiceFatalError

use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.

the class ContainerLauncherManager method reportError.

public void reportError(int containerLauncherIndex, ServicePluginError servicePluginError, String diagnostics, DagInfo dagInfo) {
    if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) {
        String msg = "Fatal Error reported by ContainerLauncher" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext) + ", servicePluginError=" + servicePluginError + ", diagnostics= " + (diagnostics == null ? "" : diagnostics);
        LOG.error(msg);
        sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, null));
    } else {
        Utils.processNonFatalServiceErrorReport(Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext), servicePluginError, diagnostics, dagInfo, appContext, "ContainerLauncher");
    }
}
Also used : DAGAppMasterEventUserServiceFatalError(org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError)

Aggregations

DAGAppMasterEventUserServiceFatalError (org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError)21 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)12 TezException (org.apache.tez.dag.api.TezException)11 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)9 Event (org.apache.hadoop.yarn.event.Event)6 EventHandler (org.apache.hadoop.yarn.event.EventHandler)6 DagInfoImplForTest (org.apache.tez.dag.helpers.DagInfoImplForTest)6 Test (org.junit.Test)6 Configuration (org.apache.hadoop.conf.Configuration)5 IOException (java.io.IOException)4 NodeId (org.apache.hadoop.yarn.api.records.NodeId)4 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)4 AppContext (org.apache.tez.dag.app.AppContext)4 DAG (org.apache.tez.dag.app.dag.DAG)4 TaskAttempt (org.apache.tez.dag.app.dag.TaskAttempt)4 InvocationTargetException (java.lang.reflect.InvocationTargetException)3 LinkedList (java.util.LinkedList)3 NamedEntityDescriptor (org.apache.tez.dag.api.NamedEntityDescriptor)3 DAGEventTerminateDag (org.apache.tez.dag.app.dag.event.DAGEventTerminateDag)3 ContainerLauncherLaunchRequestEvent (org.apache.tez.dag.app.rm.ContainerLauncherLaunchRequestEvent)3