Search in sources :

Example 1 with Signal

use of org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal in project hadoop by apache.

the class ContainerLaunch method signalContainer.

/**
   * Send a signal to the container.
   *
   *
   * @throws IOException
   */
// dispatcher not typed
@SuppressWarnings("unchecked")
public void signalContainer(SignalContainerCommand command) throws IOException {
    ContainerId containerId = container.getContainerTokenIdentifier().getContainerID();
    String containerIdStr = containerId.toString();
    String user = container.getUser();
    Signal signal = translateCommandToSignal(command);
    if (signal.equals(Signal.NULL)) {
        LOG.info("ignore signal command " + command);
        return;
    }
    LOG.info("Sending signal " + command + " to container " + containerIdStr);
    boolean alreadyLaunched = !containerAlreadyLaunched.compareAndSet(false, true);
    if (!alreadyLaunched) {
        LOG.info("Container " + containerIdStr + " not launched." + " Not sending the signal");
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Getting pid for container " + containerIdStr + " to send signal to from pid file " + (pidFilePath != null ? pidFilePath.toString() : "null"));
    }
    try {
        // get process id from pid file if available
        // else if shell is still active, get it from the shell
        String processId = null;
        if (pidFilePath != null) {
            processId = getContainerPid(pidFilePath);
        }
        if (processId != null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Sending signal to pid " + processId + " as user " + user + " for container " + containerIdStr);
            }
            boolean result = exec.signalContainer(new ContainerSignalContext.Builder().setContainer(container).setUser(user).setPid(processId).setSignal(signal).build());
            String diagnostics = "Sent signal " + command + " (" + signal + ") to pid " + processId + " as user " + user + " for container " + containerIdStr + ", result=" + (result ? "success" : "failed");
            LOG.info(diagnostics);
            dispatcher.getEventHandler().handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics));
        }
    } catch (Exception e) {
        String message = "Exception when sending signal to container " + containerIdStr + ": " + StringUtils.stringifyException(e);
        LOG.warn(message);
    }
}
Also used : Signal(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerDiagnosticsUpdateEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException)

Example 2 with Signal

use of org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal in project hadoop by apache.

the class TestContainerManager method testContainerLaunchAndSignal.

// Verify signal container request can be delivered from
// NodeStatusUpdaterImpl to ContainerExecutor.
private void testContainerLaunchAndSignal(SignalContainerCommand command) throws IOException, InterruptedException, YarnException {
    Signal signal = ContainerLaunch.translateCommandToSignal(command);
    containerManager.start();
    File scriptFile = new File(tmpDir, "scriptFile.sh");
    PrintWriter fileWriter = new PrintWriter(scriptFile);
    File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile();
    // So that start file is readable by the test
    fileWriter.write("\numask 0");
    fileWriter.write("\necho Hello World! > " + processStartFile);
    fileWriter.write("\necho $$ >> " + processStartFile);
    fileWriter.write("\nexec sleep 1000s");
    fileWriter.close();
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    // ////// Construct the Container-id
    ContainerId cId = createContainerId(0);
    URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
    LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
    rsrc_alpha.setResource(resource_alpha);
    rsrc_alpha.setSize(-1);
    rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
    rsrc_alpha.setType(LocalResourceType.FILE);
    rsrc_alpha.setTimestamp(scriptFile.lastModified());
    String destinationFile = "dest_file";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, rsrc_alpha);
    containerLaunchContext.setLocalResources(localResources);
    List<String> commands = new ArrayList<>();
    commands.add("/bin/bash");
    commands.add(scriptFile.getAbsolutePath());
    containerLaunchContext.setCommands(commands);
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    int timeoutSecs = 0;
    while (!processStartFile.exists() && timeoutSecs++ < 20) {
        Thread.sleep(1000);
        LOG.info("Waiting for process start-file to be created");
    }
    Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
    // Simulate NodeStatusUpdaterImpl sending CMgrSignalContainersEvent
    SignalContainerRequest signalReq = SignalContainerRequest.newInstance(cId, command);
    List<SignalContainerRequest> reqs = new ArrayList<>();
    reqs.add(signalReq);
    containerManager.handle(new CMgrSignalContainersEvent(reqs));
    final ArgumentCaptor<ContainerSignalContext> signalContextCaptor = ArgumentCaptor.forClass(ContainerSignalContext.class);
    if (signal.equals(Signal.NULL)) {
        verify(exec, never()).signalContainer(signalContextCaptor.capture());
    } else {
        verify(exec, timeout(10000).atLeastOnce()).signalContainer(signalContextCaptor.capture());
        ContainerSignalContext signalContext = signalContextCaptor.getAllValues().get(0);
        Assert.assertEquals(cId, signalContext.getContainer().getContainerId());
        Assert.assertEquals(signal, signalContext.getSignal());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) CMgrSignalContainersEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent) HashMap(java.util.HashMap) SignalContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest) ArrayList(java.util.ArrayList) ContainerSignalContext(org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) URL(org.apache.hadoop.yarn.api.records.URL) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) Signal(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 3 with Signal

use of org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal in project hadoop by apache.

the class ContainerLaunch method cleanupContainer.

/**
   * Cleanup the container.
   * Cancels the launch if launch has not started yet or signals
   * the executor to not execute the process if not already done so.
   * Also, sends a SIGTERM followed by a SIGKILL to the process if
   * the process id is available.
   * @throws IOException
   */
// dispatcher not typed
@SuppressWarnings("unchecked")
public void cleanupContainer() throws IOException {
    ContainerId containerId = container.getContainerId();
    String containerIdStr = containerId.toString();
    LOG.info("Cleaning up container " + containerIdStr);
    try {
        context.getNMStateStore().storeContainerKilled(containerId);
    } catch (IOException e) {
        LOG.error("Unable to mark container " + containerId + " killed in store", e);
    }
    // launch flag will be set to true if process already launched
    boolean alreadyLaunched = !containerAlreadyLaunched.compareAndSet(false, true);
    if (!alreadyLaunched) {
        LOG.info("Container " + containerIdStr + " not launched." + " No cleanup needed to be done");
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Marking container " + containerIdStr + " as inactive");
    }
    // this should ensure that if the container process has not launched 
    // by this time, it will never be launched
    exec.deactivateContainer(containerId);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Getting pid for container " + containerIdStr + " to kill" + " from pid file " + (pidFilePath != null ? pidFilePath.toString() : "null"));
    }
    // however the container process may have already started
    try {
        // get process id from pid file if available
        // else if shell is still active, get it from the shell
        String processId = null;
        if (pidFilePath != null) {
            processId = getContainerPid(pidFilePath);
        }
        // kill process
        if (processId != null) {
            String user = container.getUser();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Sending signal to pid " + processId + " as user " + user + " for container " + containerIdStr);
            }
            final Signal signal = sleepDelayBeforeSigKill > 0 ? Signal.TERM : Signal.KILL;
            boolean result = exec.signalContainer(new ContainerSignalContext.Builder().setContainer(container).setUser(user).setPid(processId).setSignal(signal).build());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Sent signal " + signal + " to pid " + processId + " as user " + user + " for container " + containerIdStr + ", result=" + (result ? "success" : "failed"));
            }
            if (sleepDelayBeforeSigKill > 0) {
                new DelayedProcessKiller(container, user, processId, sleepDelayBeforeSigKill, Signal.KILL, exec).start();
            }
        }
    } catch (Exception e) {
        String message = "Exception when trying to cleanup container " + containerIdStr + ": " + StringUtils.stringifyException(e);
        LOG.warn(message);
        dispatcher.getEventHandler().handle(new ContainerDiagnosticsUpdateEvent(containerId, message));
    } finally {
        // cleanup pid file if present
        if (pidFilePath != null) {
            FileContext lfs = FileContext.getLocalFSFileContext();
            lfs.delete(pidFilePath, false);
            lfs.delete(pidFilePath.suffix(EXIT_CODE_FILE_SUFFIX), false);
        }
    }
}
Also used : Signal(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal) DelayedProcessKiller(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.DelayedProcessKiller) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerDiagnosticsUpdateEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) FileContext(org.apache.hadoop.fs.FileContext)

Aggregations

ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 Signal (org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal)3 IOException (java.io.IOException)2 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)2 ContainerDiagnosticsUpdateEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent)2 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 FileContext (org.apache.hadoop.fs.FileContext)1 Path (org.apache.hadoop.fs.Path)1 SignalContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest)1 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)1 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)1 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)1 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)1 URL (org.apache.hadoop.yarn.api.records.URL)1 CMgrSignalContainersEvent (org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent)1 DelayedProcessKiller (org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.DelayedProcessKiller)1 ContainerSignalContext (org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext)1