use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class DefaultContainerExecutor method launchContainer.
@Override
public int launchContainer(ContainerStartContext ctx) throws IOException {
Container container = ctx.getContainer();
Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath();
Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath();
String user = ctx.getUser();
Path containerWorkDir = ctx.getContainerWorkDir();
List<String> localDirs = ctx.getLocalDirs();
List<String> logDirs = ctx.getLogDirs();
FsPermission dirPerm = new FsPermission(APPDIR_PERM);
ContainerId containerId = container.getContainerId();
// create container dirs on all disks
String containerIdStr = containerId.toString();
String appIdStr = containerId.getApplicationAttemptId().getApplicationId().toString();
for (String sLocalDir : localDirs) {
Path usersdir = new Path(sLocalDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, user);
Path appCacheDir = new Path(userdir, ContainerLocalizer.APPCACHE);
Path appDir = new Path(appCacheDir, appIdStr);
Path containerDir = new Path(appDir, containerIdStr);
createDir(containerDir, dirPerm, true, user);
}
// Create the container log-dirs on all disks
createContainerLogDirs(appIdStr, containerIdStr, logDirs, user);
Path tmpDir = new Path(containerWorkDir, YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR);
createDir(tmpDir, dirPerm, false, user);
// copy container tokens to work dir
Path tokenDst = new Path(containerWorkDir, ContainerLaunch.FINAL_CONTAINER_TOKENS_FILE);
copyFile(nmPrivateTokensPath, tokenDst, user);
// copy launch script to work dir
Path launchDst = new Path(containerWorkDir, ContainerLaunch.CONTAINER_SCRIPT);
copyFile(nmPrivateContainerScriptPath, launchDst, user);
// Create new local launch wrapper script
LocalWrapperScriptBuilder sb = getLocalWrapperScriptBuilder(containerIdStr, containerWorkDir);
// Windows path length limitation.
if (Shell.WINDOWS && sb.getWrapperScriptPath().toString().length() > WIN_MAX_PATH) {
throw new IOException(String.format("Cannot launch container using script at path %s, because it exceeds " + "the maximum supported path length of %d characters. Consider " + "configuring shorter directories in %s.", sb.getWrapperScriptPath(), WIN_MAX_PATH, YarnConfiguration.NM_LOCAL_DIRS));
}
Path pidFile = getPidFilePath(containerId);
if (pidFile != null) {
sb.writeLocalWrapperScript(launchDst, pidFile);
} else {
LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error");
return ExitCode.TERMINATED.getExitCode();
}
// create log dir under app
// fork script
Shell.CommandExecutor shExec = null;
try {
setScriptExecutable(launchDst, user);
setScriptExecutable(sb.getWrapperScriptPath(), user);
shExec = buildCommandExecutor(sb.getWrapperScriptPath().toString(), containerIdStr, user, pidFile, container.getResource(), new File(containerWorkDir.toUri().getPath()), container.getLaunchContext().getEnvironment());
if (isContainerActive(containerId)) {
shExec.execute();
} else {
LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error");
return ExitCode.TERMINATED.getExitCode();
}
} catch (IOException e) {
if (null == shExec) {
return -1;
}
int exitCode = shExec.getExitCode();
LOG.warn("Exit code from container " + containerId + " is : " + exitCode);
// container-executor's output
if (exitCode != ExitCode.FORCE_KILLED.getExitCode() && exitCode != ExitCode.TERMINATED.getExitCode()) {
LOG.warn("Exception from container-launch with container ID: " + containerId + " and exit code: " + exitCode, e);
StringBuilder builder = new StringBuilder();
builder.append("Exception from container-launch.\n");
builder.append("Container id: ").append(containerId).append("\n");
builder.append("Exit code: ").append(exitCode).append("\n");
if (!Optional.fromNullable(e.getMessage()).or("").isEmpty()) {
builder.append("Exception message: ");
builder.append(e.getMessage()).append("\n");
}
builder.append("Stack trace: ");
builder.append(StringUtils.stringifyException(e)).append("\n");
if (!shExec.getOutput().isEmpty()) {
builder.append("Shell output: ");
builder.append(shExec.getOutput()).append("\n");
}
String diagnostics = builder.toString();
logOutput(diagnostics);
container.handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics));
} else {
container.handle(new ContainerDiagnosticsUpdateEvent(containerId, "Container killed on request. Exit code is " + exitCode));
}
return exitCode;
} finally {
if (shExec != null)
shExec.close();
}
return 0;
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class ContainerManagerImpl method reInitializeContainer.
/**
* ReInitialize a container using a new Launch Context. If the
* retryFailureContext is not provided, The container is
* terminated on Failure.
*
* NOTE: Auto-Commit is true by default. This also means that the rollback
* context is purged as soon as the command to start the new process
* is sent. (The Container moves to RUNNING state)
*
* @param containerId Container Id.
* @param autoCommit Auto Commit flag.
* @param reInitLaunchContext Target Launch Context.
* @throws YarnException Yarn Exception.
*/
public void reInitializeContainer(ContainerId containerId, ContainerLaunchContext reInitLaunchContext, boolean autoCommit) throws YarnException {
Container container = preReInitializeOrLocalizeCheck(containerId, ReInitOp.RE_INIT);
ResourceSet resourceSet = new ResourceSet();
try {
if (reInitLaunchContext != null) {
resourceSet.addResources(reInitLaunchContext.getLocalResources());
}
dispatcher.getEventHandler().handle(new ContainerReInitEvent(containerId, reInitLaunchContext, resourceSet, autoCommit));
container.setIsReInitializing(true);
} catch (URISyntaxException e) {
LOG.info("Error when parsing local resource URI for upgrade of" + "Container [" + containerId + "]", e);
throw new YarnException(e);
}
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class ContainerManagerImpl method cleanupContainersOnNMResync.
public void cleanupContainersOnNMResync() {
Map<ContainerId, Container> containers = context.getContainers();
if (containers.isEmpty()) {
return;
}
LOG.info("Containers still running on " + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC + " : " + containers.keySet());
List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
LOG.info("Waiting for containers to be killed");
this.handle(new CMgrCompletedContainersEvent(containerIds, CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC));
/*
* We will wait till all the containers change their state to COMPLETE. We
* will not remove the container statuses from nm context because these
* are used while re-registering node manager with resource manager.
*/
boolean allContainersCompleted = false;
while (!containers.isEmpty() && !allContainersCompleted) {
allContainersCompleted = true;
for (Entry<ContainerId, Container> container : containers.entrySet()) {
if (((ContainerImpl) container.getValue()).getCurrentState() != ContainerState.COMPLETE) {
allContainersCompleted = false;
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
LOG.warn("Interrupted while sleeping on container kill on resync", ex);
}
break;
}
}
}
// All containers killed
if (allContainersCompleted) {
LOG.info("All containers in DONE state");
} else {
LOG.info("Done waiting for containers to be killed. Still alive: " + containers.keySet());
}
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class ContainersLauncher method handle.
@Override
public void handle(ContainersLauncherEvent event) {
// TODO: ContainersLauncher launches containers one by one!!
Container container = event.getContainer();
ContainerId containerId = container.getContainerId();
switch(event.getType()) {
case LAUNCH_CONTAINER:
Application app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
ContainerLaunch launch = new ContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
containerLauncher.submit(launch);
running.put(containerId, launch);
break;
case RELAUNCH_CONTAINER:
app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
ContainerRelaunch relaunch = new ContainerRelaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
containerLauncher.submit(relaunch);
running.put(containerId, relaunch);
break;
case RECOVER_CONTAINER:
app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
launch = new RecoveredContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
containerLauncher.submit(launch);
running.put(containerId, launch);
break;
case CLEANUP_CONTAINER:
case CLEANUP_CONTAINER_FOR_REINIT:
ContainerLaunch launcher = running.remove(containerId);
if (launcher == null) {
// Container not launched. So nothing needs to be done.
return;
}
// no sub-processes are alive.
try {
launcher.cleanupContainer();
} catch (IOException e) {
LOG.warn("Got exception while cleaning container " + containerId + ". Ignoring.");
}
break;
case SIGNAL_CONTAINER:
SignalContainersLauncherEvent signalEvent = (SignalContainersLauncherEvent) event;
ContainerLaunch runningContainer = running.get(containerId);
if (runningContainer == null) {
// Container not launched. So nothing needs to be done.
LOG.info("Container " + containerId + " not running, nothing to signal.");
return;
}
try {
runningContainer.signalContainer(signalEvent.getCommand());
} catch (IOException e) {
LOG.warn("Got exception while signaling container " + containerId + " with command " + signalEvent.getCommand());
}
break;
}
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class LinuxContainerExecutor method isContainerAlive.
@Override
public boolean isContainerAlive(ContainerLivenessContext ctx) throws IOException {
String user = ctx.getUser();
String pid = ctx.getPid();
Container container = ctx.getContainer();
// Send a test signal to the process as the user to see if it's alive
return signalContainer(new ContainerSignalContext.Builder().setContainer(container).setUser(user).setPid(pid).setSignal(Signal.NULL).build());
}
Aggregations