Search in sources :

Example 56 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class DefaultContainerExecutor method launchContainer.

@Override
public int launchContainer(ContainerStartContext ctx) throws IOException {
    Container container = ctx.getContainer();
    Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath();
    Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath();
    String user = ctx.getUser();
    Path containerWorkDir = ctx.getContainerWorkDir();
    List<String> localDirs = ctx.getLocalDirs();
    List<String> logDirs = ctx.getLogDirs();
    FsPermission dirPerm = new FsPermission(APPDIR_PERM);
    ContainerId containerId = container.getContainerId();
    // create container dirs on all disks
    String containerIdStr = containerId.toString();
    String appIdStr = containerId.getApplicationAttemptId().getApplicationId().toString();
    for (String sLocalDir : localDirs) {
        Path usersdir = new Path(sLocalDir, ContainerLocalizer.USERCACHE);
        Path userdir = new Path(usersdir, user);
        Path appCacheDir = new Path(userdir, ContainerLocalizer.APPCACHE);
        Path appDir = new Path(appCacheDir, appIdStr);
        Path containerDir = new Path(appDir, containerIdStr);
        createDir(containerDir, dirPerm, true, user);
    }
    // Create the container log-dirs on all disks
    createContainerLogDirs(appIdStr, containerIdStr, logDirs, user);
    Path tmpDir = new Path(containerWorkDir, YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR);
    createDir(tmpDir, dirPerm, false, user);
    // copy container tokens to work dir
    Path tokenDst = new Path(containerWorkDir, ContainerLaunch.FINAL_CONTAINER_TOKENS_FILE);
    copyFile(nmPrivateTokensPath, tokenDst, user);
    // copy launch script to work dir
    Path launchDst = new Path(containerWorkDir, ContainerLaunch.CONTAINER_SCRIPT);
    copyFile(nmPrivateContainerScriptPath, launchDst, user);
    // Create new local launch wrapper script
    LocalWrapperScriptBuilder sb = getLocalWrapperScriptBuilder(containerIdStr, containerWorkDir);
    // Windows path length limitation.
    if (Shell.WINDOWS && sb.getWrapperScriptPath().toString().length() > WIN_MAX_PATH) {
        throw new IOException(String.format("Cannot launch container using script at path %s, because it exceeds " + "the maximum supported path length of %d characters.  Consider " + "configuring shorter directories in %s.", sb.getWrapperScriptPath(), WIN_MAX_PATH, YarnConfiguration.NM_LOCAL_DIRS));
    }
    Path pidFile = getPidFilePath(containerId);
    if (pidFile != null) {
        sb.writeLocalWrapperScript(launchDst, pidFile);
    } else {
        LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error");
        return ExitCode.TERMINATED.getExitCode();
    }
    // create log dir under app
    // fork script
    Shell.CommandExecutor shExec = null;
    try {
        setScriptExecutable(launchDst, user);
        setScriptExecutable(sb.getWrapperScriptPath(), user);
        shExec = buildCommandExecutor(sb.getWrapperScriptPath().toString(), containerIdStr, user, pidFile, container.getResource(), new File(containerWorkDir.toUri().getPath()), container.getLaunchContext().getEnvironment());
        if (isContainerActive(containerId)) {
            shExec.execute();
        } else {
            LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error");
            return ExitCode.TERMINATED.getExitCode();
        }
    } catch (IOException e) {
        if (null == shExec) {
            return -1;
        }
        int exitCode = shExec.getExitCode();
        LOG.warn("Exit code from container " + containerId + " is : " + exitCode);
        // container-executor's output
        if (exitCode != ExitCode.FORCE_KILLED.getExitCode() && exitCode != ExitCode.TERMINATED.getExitCode()) {
            LOG.warn("Exception from container-launch with container ID: " + containerId + " and exit code: " + exitCode, e);
            StringBuilder builder = new StringBuilder();
            builder.append("Exception from container-launch.\n");
            builder.append("Container id: ").append(containerId).append("\n");
            builder.append("Exit code: ").append(exitCode).append("\n");
            if (!Optional.fromNullable(e.getMessage()).or("").isEmpty()) {
                builder.append("Exception message: ");
                builder.append(e.getMessage()).append("\n");
            }
            builder.append("Stack trace: ");
            builder.append(StringUtils.stringifyException(e)).append("\n");
            if (!shExec.getOutput().isEmpty()) {
                builder.append("Shell output: ");
                builder.append(shExec.getOutput()).append("\n");
            }
            String diagnostics = builder.toString();
            logOutput(diagnostics);
            container.handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics));
        } else {
            container.handle(new ContainerDiagnosticsUpdateEvent(containerId, "Container killed on request. Exit code is " + exitCode));
        }
        return exitCode;
    } finally {
        if (shExec != null)
            shExec.close();
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) ContainerDiagnosticsUpdateEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent) IOException(java.io.IOException) CommandExecutor(org.apache.hadoop.util.Shell.CommandExecutor) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) Shell(org.apache.hadoop.util.Shell) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FsPermission(org.apache.hadoop.fs.permission.FsPermission) File(java.io.File)

Example 57 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class ContainerManagerImpl method reInitializeContainer.

/**
   * ReInitialize a container using a new Launch Context. If the
   * retryFailureContext is not provided, The container is
   * terminated on Failure.
   *
   * NOTE: Auto-Commit is true by default. This also means that the rollback
   *       context is purged as soon as the command to start the new process
   *       is sent. (The Container moves to RUNNING state)
   *
   * @param containerId Container Id.
   * @param autoCommit Auto Commit flag.
   * @param reInitLaunchContext Target Launch Context.
   * @throws YarnException Yarn Exception.
   */
public void reInitializeContainer(ContainerId containerId, ContainerLaunchContext reInitLaunchContext, boolean autoCommit) throws YarnException {
    Container container = preReInitializeOrLocalizeCheck(containerId, ReInitOp.RE_INIT);
    ResourceSet resourceSet = new ResourceSet();
    try {
        if (reInitLaunchContext != null) {
            resourceSet.addResources(reInitLaunchContext.getLocalResources());
        }
        dispatcher.getEventHandler().handle(new ContainerReInitEvent(containerId, reInitLaunchContext, resourceSet, autoCommit));
        container.setIsReInitializing(true);
    } catch (URISyntaxException e) {
        LOG.info("Error when parsing local resource URI for upgrade of" + "Container [" + containerId + "]", e);
        throw new YarnException(e);
    }
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerReInitEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerReInitEvent) ResourceSet(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Example 58 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class ContainerManagerImpl method cleanupContainersOnNMResync.

public void cleanupContainersOnNMResync() {
    Map<ContainerId, Container> containers = context.getContainers();
    if (containers.isEmpty()) {
        return;
    }
    LOG.info("Containers still running on " + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC + " : " + containers.keySet());
    List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
    LOG.info("Waiting for containers to be killed");
    this.handle(new CMgrCompletedContainersEvent(containerIds, CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC));
    /*
     * We will wait till all the containers change their state to COMPLETE. We
     * will not remove the container statuses from nm context because these
     * are used while re-registering node manager with resource manager.
     */
    boolean allContainersCompleted = false;
    while (!containers.isEmpty() && !allContainersCompleted) {
        allContainersCompleted = true;
        for (Entry<ContainerId, Container> container : containers.entrySet()) {
            if (((ContainerImpl) container.getValue()).getCurrentState() != ContainerState.COMPLETE) {
                allContainersCompleted = false;
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ex) {
                    LOG.warn("Interrupted while sleeping on container kill on resync", ex);
                }
                break;
            }
        }
    }
    // All containers killed
    if (allContainersCompleted) {
        LOG.info("All containers in DONE state");
    } else {
        LOG.info("Done waiting for containers to be killed. Still alive: " + containers.keySet());
    }
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) CMgrCompletedContainersEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent) ArrayList(java.util.ArrayList)

Example 59 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class ContainersLauncher method handle.

@Override
public void handle(ContainersLauncherEvent event) {
    // TODO: ContainersLauncher launches containers one by one!!
    Container container = event.getContainer();
    ContainerId containerId = container.getContainerId();
    switch(event.getType()) {
        case LAUNCH_CONTAINER:
            Application app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
            ContainerLaunch launch = new ContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
            containerLauncher.submit(launch);
            running.put(containerId, launch);
            break;
        case RELAUNCH_CONTAINER:
            app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
            ContainerRelaunch relaunch = new ContainerRelaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
            containerLauncher.submit(relaunch);
            running.put(containerId, relaunch);
            break;
        case RECOVER_CONTAINER:
            app = context.getApplications().get(containerId.getApplicationAttemptId().getApplicationId());
            launch = new RecoveredContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
            containerLauncher.submit(launch);
            running.put(containerId, launch);
            break;
        case CLEANUP_CONTAINER:
        case CLEANUP_CONTAINER_FOR_REINIT:
            ContainerLaunch launcher = running.remove(containerId);
            if (launcher == null) {
                // Container not launched. So nothing needs to be done.
                return;
            }
            // no sub-processes are alive.
            try {
                launcher.cleanupContainer();
            } catch (IOException e) {
                LOG.warn("Got exception while cleaning container " + containerId + ". Ignoring.");
            }
            break;
        case SIGNAL_CONTAINER:
            SignalContainersLauncherEvent signalEvent = (SignalContainersLauncherEvent) event;
            ContainerLaunch runningContainer = running.get(containerId);
            if (runningContainer == null) {
                // Container not launched. So nothing needs to be done.
                LOG.info("Container " + containerId + " not running, nothing to signal.");
                return;
            }
            try {
                runningContainer.signalContainer(signalEvent.getCommand());
            } catch (IOException e) {
                LOG.warn("Got exception while signaling container " + containerId + " with command " + signalEvent.getCommand());
            }
            break;
    }
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) IOException(java.io.IOException) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)

Example 60 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class LinuxContainerExecutor method isContainerAlive.

@Override
public boolean isContainerAlive(ContainerLivenessContext ctx) throws IOException {
    String user = ctx.getUser();
    String pid = ctx.getPid();
    Container container = ctx.getContainer();
    // Send a test signal to the process as the user to see if it's alive
    return signalContainer(new ContainerSignalContext.Builder().setContainer(container).setUser(user).setPid(pid).setSignal(Signal.NULL).build());
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)

Aggregations

Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)109 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Test (org.junit.Test)43 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)33 Path (org.apache.hadoop.fs.Path)31 ArrayList (java.util.ArrayList)29 Application (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)29 HashMap (java.util.HashMap)27 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)27 Configuration (org.apache.hadoop.conf.Configuration)24 IOException (java.io.IOException)20 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)18 ContainerEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent)17 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)16 Collection (java.util.Collection)14 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)14 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)14 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)14 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)13 ApplicationEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent)13