Search in sources :

Example 51 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class TestNodeStatusUpdater method testCleanedupApplicationContainerCleanup.

@Test
public void testCleanedupApplicationContainerCleanup() throws IOException {
    NodeManager nm = new NodeManager();
    YarnConfiguration conf = new YarnConfiguration();
    conf.set(NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, "1000000");
    nm.init(conf);
    NodeStatusUpdaterImpl nodeStatusUpdater = (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
    ApplicationId appId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    ContainerId cId = ContainerId.newContainerId(appAttemptId, 1);
    Token containerToken = BuilderUtils.newContainerToken(cId, 0, "anyHost", 1234, "anyUser", BuilderUtils.newResource(1024, 1), 0, 123, "password".getBytes(), 0);
    Container anyCompletedContainer = new ContainerImpl(conf, null, null, null, null, BuilderUtils.newContainerTokenIdentifier(containerToken), nm.getNMContext()) {

        @Override
        public ContainerState getCurrentState() {
            return ContainerState.COMPLETE;
        }
    };
    Application application = mock(Application.class);
    when(application.getApplicationState()).thenReturn(ApplicationState.RUNNING);
    nm.getNMContext().getApplications().putIfAbsent(appId, application);
    nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    when(application.getApplicationState()).thenReturn(ApplicationState.FINISHING_CONTAINERS_WAIT);
    // The completed container will be saved in case of lost heartbeat.
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
    nm.getNMContext().getApplications().remove(appId);
    // The completed container will be saved in case of lost heartbeat.
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) Token(org.apache.hadoop.yarn.api.records.Token) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Test(org.junit.Test)

Example 52 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class TestNodeStatusUpdater method testCompletedContainersIsRecentlyStopped.

@Test(timeout = 10000)
public void testCompletedContainersIsRecentlyStopped() throws Exception {
    NodeManager nm = new NodeManager();
    nm.init(conf);
    NodeStatusUpdaterImpl nodeStatusUpdater = (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
    ApplicationId appId = ApplicationId.newInstance(0, 0);
    Application completedApp = mock(Application.class);
    when(completedApp.getApplicationState()).thenReturn(ApplicationState.FINISHED);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
    Token containerToken = BuilderUtils.newContainerToken(containerId, 0, "host", 1234, "user", BuilderUtils.newResource(1024, 1), 0, 123, "password".getBytes(), 0);
    Container completedContainer = new ContainerImpl(conf, null, null, null, null, BuilderUtils.newContainerTokenIdentifier(containerToken), nm.getNMContext()) {

        @Override
        public ContainerState getCurrentState() {
            return ContainerState.COMPLETE;
        }
    };
    nm.getNMContext().getApplications().putIfAbsent(appId, completedApp);
    nm.getNMContext().getContainers().put(containerId, completedContainer);
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(containerId));
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) Token(org.apache.hadoop.yarn.api.records.Token) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Test(org.junit.Test)

Example 53 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class TestNodeManagerReboot method testClearLocalDirWhenNodeReboot.

@Test(timeout = 2000000)
public void testClearLocalDirWhenNodeReboot() throws IOException, YarnException, InterruptedException {
    nm = new MyNodeManager();
    nm.start();
    final ContainerManagementProtocol containerManager = nm.getContainerManager();
    // create files under fileCache
    createFiles(nmLocalDir.getAbsolutePath(), ContainerLocalizer.FILECACHE, 100);
    localResourceDir.mkdirs();
    ContainerLaunchContext containerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
    // Construct the Container-id
    ContainerId cId = createContainerId();
    URL localResourceUri = URL.fromPath(localFS.makeQualified(new Path(localResourceDir.getAbsolutePath())));
    LocalResource localResource = LocalResource.newInstance(localResourceUri, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, -1, localResourceDir.lastModified());
    String destinationFile = "dest_file";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, localResource);
    containerLaunchContext.setLocalResources(localResources);
    List<String> commands = new ArrayList<String>();
    containerLaunchContext.setCommands(commands);
    NodeId nodeId = nm.getNMContext().getNodeId();
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, TestContainerManager.createContainerToken(cId, 0, nodeId, destinationFile, nm.getNMContext().getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    final StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    final UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(cId.getApplicationAttemptId().toString());
    NMTokenIdentifier nmIdentifier = new NMTokenIdentifier(cId.getApplicationAttemptId(), nodeId, user, 123);
    currentUser.addTokenIdentifier(nmIdentifier);
    currentUser.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws YarnException, IOException {
            nm.getContainerManager().startContainers(allRequests);
            return null;
        }
    });
    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cId);
    GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(containerIds);
    Container container = nm.getNMContext().getContainers().get(request.getContainerIds().get(0));
    final int MAX_TRIES = 20;
    int numTries = 0;
    while (!container.getContainerState().equals(ContainerState.DONE) && numTries <= MAX_TRIES) {
        try {
            Thread.sleep(500);
        } catch (InterruptedException ex) {
        // Do nothing
        }
        numTries++;
    }
    Assert.assertEquals(ContainerState.DONE, container.getContainerState());
    Assert.assertTrue("The container should create a subDir named currentUser: " + user + "under localDir/usercache", numOfLocalDirs(nmLocalDir.getAbsolutePath(), ContainerLocalizer.USERCACHE) > 0);
    Assert.assertTrue("There should be files or Dirs under nm_private when " + "container is launched", numOfLocalDirs(nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0);
    // restart the NodeManager
    restartNM(MAX_TRIES);
    checkNumOfLocalDirs();
    verify(delService, times(1)).delete((String) isNull(), argThat(new PathInclude(ResourceLocalizationService.NM_PRIVATE_DIR + "_DEL_")));
    verify(delService, times(1)).delete((String) isNull(), argThat(new PathInclude(ContainerLocalizer.FILECACHE + "_DEL_")));
    verify(delService, times(1)).scheduleFileDeletionTask(argThat(new FileDeletionInclude(user, null, new String[] { destinationFile })));
    verify(delService, times(1)).scheduleFileDeletionTask(argThat(new FileDeletionInclude(null, ContainerLocalizer.USERCACHE + "_DEL_", new String[] {})));
    // restart the NodeManager again
    // this time usercache directory should be empty
    restartNM(MAX_TRIES);
    checkNumOfLocalDirs();
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) HashMap(java.util.HashMap) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) URL(org.apache.hadoop.yarn.api.records.URL) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) ContainerManagementProtocol(org.apache.hadoop.yarn.api.ContainerManagementProtocol) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Test(org.junit.Test)

Example 54 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class TestNodeManagerResync method testNMSentContainerStatusOnResync.

// This is to test when NM gets the resync response from last heart beat, it
// should be able to send the already-sent-via-last-heart-beat container
// statuses again when it re-register with RM.
@Test
public void testNMSentContainerStatusOnResync() throws Exception {
    final ContainerStatus testCompleteContainer = TestNodeStatusUpdater.createContainerStatus(2, ContainerState.COMPLETE);
    final Container container = TestNodeStatusUpdater.getMockContainer(testCompleteContainer);
    NMContainerStatus report = createNMContainerStatus(2, ContainerState.COMPLETE);
    when(container.getNMContainerStatus()).thenReturn(report);
    NodeManager nm = new NodeManager() {

        int registerCount = 0;

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            return new TestNodeStatusUpdaterResync(context, dispatcher, healthChecker, metrics) {

                @Override
                protected ResourceTracker createResourceTracker() {
                    return new MockResourceTracker() {

                        @Override
                        public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
                            if (registerCount == 0) {
                                // first register, no containers info.
                                try {
                                    Assert.assertEquals(0, request.getNMContainerStatuses().size());
                                } catch (AssertionError error) {
                                    error.printStackTrace();
                                    assertionFailedInThread.set(true);
                                }
                                // put the completed container into the context
                                getNMContext().getContainers().put(testCompleteContainer.getContainerId(), container);
                                getNMContext().getApplications().put(testCompleteContainer.getContainerId().getApplicationAttemptId().getApplicationId(), mock(Application.class));
                            } else {
                                // second register contains the completed container info.
                                List<NMContainerStatus> statuses = request.getNMContainerStatuses();
                                try {
                                    Assert.assertEquals(1, statuses.size());
                                    Assert.assertEquals(testCompleteContainer.getContainerId(), statuses.get(0).getContainerId());
                                } catch (AssertionError error) {
                                    error.printStackTrace();
                                    assertionFailedInThread.set(true);
                                }
                            }
                            registerCount++;
                            return super.registerNodeManager(request);
                        }

                        @Override
                        public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) {
                            // first heartBeat contains the completed container info
                            List<ContainerStatus> statuses = request.getNodeStatus().getContainersStatuses();
                            try {
                                Assert.assertEquals(1, statuses.size());
                                Assert.assertEquals(testCompleteContainer.getContainerId(), statuses.get(0).getContainerId());
                            } catch (AssertionError error) {
                                error.printStackTrace();
                                assertionFailedInThread.set(true);
                            }
                            // notify RESYNC on first heartbeat.
                            return YarnServerBuilderUtils.newNodeHeartbeatResponse(1, NodeAction.RESYNC, null, null, null, null, 1000L);
                        }
                    };
                }
            };
        }
    };
    YarnConfiguration conf = createNMConfig();
    nm.init(conf);
    nm.start();
    try {
        syncBarrier.await();
    } catch (BrokenBarrierException e) {
    }
    Assert.assertFalse(assertionFailedInThread.get());
    nm.stop();
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) RegisterNodeManagerRequest(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) NodeHeartbeatRequest(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Example 55 with Container

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.

the class ContainerExecutor method reacquireContainer.

/**
   * Recover an already existing container. This is a blocking call and returns
   * only when the container exits.  Note that the container must have been
   * activated prior to this call.
   *
   * @param ctx encapsulates information necessary to reacquire container
   * @return The exit code of the pre-existing container
   * @throws IOException if there is a failure while reacquiring the container
   * @throws InterruptedException if interrupted while waiting to reacquire
   * the container
   */
public int reacquireContainer(ContainerReacquisitionContext ctx) throws IOException, InterruptedException {
    Container container = ctx.getContainer();
    String user = ctx.getUser();
    ContainerId containerId = ctx.getContainerId();
    Path pidPath = getPidFilePath(containerId);
    if (pidPath == null) {
        LOG.warn(containerId + " is not active, returning terminated error");
        return ExitCode.TERMINATED.getExitCode();
    }
    String pid = ProcessIdFileReader.getProcessId(pidPath);
    if (pid == null) {
        throw new IOException("Unable to determine pid for " + containerId);
    }
    LOG.info("Reacquiring " + containerId + " with pid " + pid);
    ContainerLivenessContext livenessContext = new ContainerLivenessContext.Builder().setContainer(container).setUser(user).setPid(pid).build();
    while (isContainerAlive(livenessContext)) {
        Thread.sleep(1000);
    }
    // wait for exit code file to appear
    final int sleepMsec = 100;
    int msecLeft = 2000;
    String exitCodeFile = ContainerLaunch.getExitCodeFile(pidPath.toString());
    File file = new File(exitCodeFile);
    while (!file.exists() && msecLeft >= 0) {
        if (!isContainerActive(containerId)) {
            LOG.info(containerId + " was deactivated");
            return ExitCode.TERMINATED.getExitCode();
        }
        Thread.sleep(sleepMsec);
        msecLeft -= sleepMsec;
    }
    if (msecLeft < 0) {
        throw new IOException("Timeout while waiting for exit code from " + containerId);
    }
    try {
        return Integer.parseInt(FileUtils.readFileToString(file).trim());
    } catch (NumberFormatException e) {
        throw new IOException("Error parsing exit code from pid " + pid, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) IOException(java.io.IOException) File(java.io.File) ContainerLivenessContext(org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext)

Aggregations

Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)109 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Test (org.junit.Test)43 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)33 Path (org.apache.hadoop.fs.Path)31 ArrayList (java.util.ArrayList)29 Application (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)29 HashMap (java.util.HashMap)27 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)27 Configuration (org.apache.hadoop.conf.Configuration)24 IOException (java.io.IOException)20 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)18 ContainerEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent)17 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)16 Collection (java.util.Collection)14 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)14 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)14 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)14 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)13 ApplicationEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent)13