use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.
the class TaskCommunicatorManager method registerTaskAttempt.
@Override
public void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId, int taskCommId) {
ContainerInfo containerInfo = registeredContainers.get(containerId);
if (containerInfo == null) {
throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to unknown container: " + containerId);
}
if (containerInfo.taskAttemptId != null) {
throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId + " with existing assignment to: " + containerInfo.taskAttemptId);
}
// Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
registeredContainers.put(containerId, new ContainerInfo(amContainerTask.getTask().getTaskAttemptID()));
ContainerId containerIdFromMap = registeredAttempts.put(amContainerTask.getTask().getTaskAttemptID(), containerId);
if (containerIdFromMap != null) {
throw new TezUncheckedException("Registering task attempt: " + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId + " when already assigned to: " + containerIdFromMap);
}
try {
taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(), amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(), amContainerTask.haveCredentialsChanged(), amContainerTask.getPriority());
} catch (Exception e) {
String msg = "Error in TaskCommunicator when registering Task Attempt" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", taskId=" + amContainerTask.getTask().getTaskAttemptID();
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
}
}
use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.
the class TaskCommunicatorManager method unregisterTaskAttempt.
@Override
public void unregisterTaskAttempt(TezTaskAttemptID attemptId, int taskCommId, TaskAttemptEndReason endReason, String diagnostics) {
ContainerId containerId = registeredAttempts.remove(attemptId);
if (containerId == null) {
LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
return;
}
ContainerInfo containerInfo = registeredContainers.get(containerId);
if (containerInfo == null) {
LOG.warn("Unregister task attempt: " + attemptId + " from non-registered container: " + containerId);
return;
}
// Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
registeredContainers.put(containerId, NULL_CONTAINER_INFO);
try {
taskCommunicators[taskCommId].unregisterRunningTaskAttempt(attemptId, endReason, diagnostics);
} catch (Exception e) {
String msg = "Error in TaskCommunicator when unregistering Task Attempt" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", taskId=" + attemptId;
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
}
}
use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.
the class TaskCommunicatorManager method registerRunningContainer.
@Override
public void registerRunningContainer(ContainerId containerId, int taskCommId) {
if (LOG.isDebugEnabled()) {
LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
}
ContainerInfo oldInfo = registeredContainers.put(containerId, NULL_CONTAINER_INFO);
if (oldInfo != null) {
throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
}
NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
try {
taskCommunicators[taskCommId].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
} catch (Exception e) {
String msg = "Error in TaskCommunicator when registering running Container" + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommId, context) + ", containerId=" + containerId + ", nodeId=" + nodeId;
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, msg, e));
}
}
use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.
the class ContainerLauncherManager method handle.
@Override
public void handle(ContainerLauncherEvent event) {
int launcherId = event.getLauncherId();
String schedulerName = appContext.getTaskSchedulerName(event.getSchedulerId());
String taskCommName = appContext.getTaskCommunicatorName(event.getTaskCommId());
switch(event.getType()) {
case CONTAINER_LAUNCH_REQUEST:
ContainerLauncherLaunchRequestEvent launchEvent = (ContainerLauncherLaunchRequestEvent) event;
ContainerLaunchRequest launchRequest = new ContainerLaunchRequest(launchEvent.getNodeId(), launchEvent.getContainerId(), launchEvent.getContainerToken(), launchEvent.getContainerLaunchContext(), launchEvent.getContainer(), schedulerName, taskCommName);
try {
containerLaunchers[launcherId].launchContainer(launchRequest);
} catch (Exception e) {
String msg = "Error when launching container" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(launcherId, appContext) + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskCommunicator=" + Utils.getTaskCommIdentifierString(event.getTaskCommId(), appContext);
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, e));
}
break;
case CONTAINER_STOP_REQUEST:
ContainerStopRequest stopRequest = new ContainerStopRequest(event.getNodeId(), event.getContainerId(), event.getContainerToken(), schedulerName, taskCommName);
try {
containerLaunchers[launcherId].stopContainer(stopRequest);
} catch (Exception e) {
String msg = "Error when stopping container" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(launcherId, appContext) + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskCommunicator=" + Utils.getTaskCommIdentifierString(event.getTaskCommId(), appContext);
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, e));
}
break;
}
}
use of org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError in project tez by apache.
the class ContainerLauncherManager method reportError.
public void reportError(int containerLauncherIndex, ServicePluginError servicePluginError, String diagnostics, DagInfo dagInfo) {
if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) {
String msg = "Fatal Error reported by ContainerLauncher" + ", containerLauncher=" + Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext) + ", servicePluginError=" + servicePluginError + ", diagnostics= " + (diagnostics == null ? "" : diagnostics);
LOG.error(msg);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, msg, null));
} else {
Utils.processNonFatalServiceErrorReport(Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext), servicePluginError, diagnostics, dagInfo, appContext, "ContainerLauncher");
}
}
Aggregations