Search in sources :

Example 1 with ContainerImpl

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl in project hadoop by apache.

the class ContainerManagerImpl method startContainerInternal.

@SuppressWarnings("unchecked")
protected void startContainerInternal(ContainerTokenIdentifier containerTokenIdentifier, StartContainerRequest request) throws YarnException, IOException {
    ContainerId containerId = containerTokenIdentifier.getContainerID();
    String containerIdStr = containerId.toString();
    String user = containerTokenIdentifier.getApplicationSubmitter();
    LOG.info("Start request for " + containerIdStr + " by user " + user);
    ContainerLaunchContext launchContext = request.getContainerLaunchContext();
    Credentials credentials = YarnServerSecurityUtils.parseCredentials(launchContext);
    Container container = new ContainerImpl(getConfig(), this.dispatcher, launchContext, credentials, metrics, containerTokenIdentifier, context);
    ApplicationId applicationID = containerId.getApplicationAttemptId().getApplicationId();
    if (context.getContainers().putIfAbsent(containerId, container) != null) {
        NMAuditLogger.logFailure(user, AuditConstants.START_CONTAINER, "ContainerManagerImpl", "Container already running on this node!", applicationID, containerId);
        throw RPCUtil.getRemoteException("Container " + containerIdStr + " already is running on this node!!");
    }
    this.readLock.lock();
    try {
        if (!isServiceStopped()) {
            // Create the application
            // populate the flow context from the launch context if the timeline
            // service v.2 is enabled
            FlowContext flowContext = null;
            if (YarnConfiguration.timelineServiceV2Enabled(getConfig())) {
                String flowName = launchContext.getEnvironment().get(TimelineUtils.FLOW_NAME_TAG_PREFIX);
                String flowVersion = launchContext.getEnvironment().get(TimelineUtils.FLOW_VERSION_TAG_PREFIX);
                String flowRunIdStr = launchContext.getEnvironment().get(TimelineUtils.FLOW_RUN_ID_TAG_PREFIX);
                long flowRunId = 0L;
                if (flowRunIdStr != null && !flowRunIdStr.isEmpty()) {
                    flowRunId = Long.parseLong(flowRunIdStr);
                }
                flowContext = new FlowContext(flowName, flowVersion, flowRunId);
            }
            if (!context.getApplications().containsKey(applicationID)) {
                Application application = new ApplicationImpl(dispatcher, user, flowContext, applicationID, credentials, context);
                if (context.getApplications().putIfAbsent(applicationID, application) == null) {
                    LOG.info("Creating a new application reference for app " + applicationID);
                    LogAggregationContext logAggregationContext = containerTokenIdentifier.getLogAggregationContext();
                    Map<ApplicationAccessType, String> appAcls = container.getLaunchContext().getApplicationACLs();
                    context.getNMStateStore().storeApplication(applicationID, buildAppProto(applicationID, user, credentials, appAcls, logAggregationContext));
                    dispatcher.getEventHandler().handle(new ApplicationInitEvent(applicationID, appAcls, logAggregationContext));
                }
            }
            this.context.getNMStateStore().storeContainer(containerId, containerTokenIdentifier.getVersion(), request);
            dispatcher.getEventHandler().handle(new ApplicationContainerInitEvent(container));
            this.context.getContainerTokenSecretManager().startContainerSuccessful(containerTokenIdentifier);
            NMAuditLogger.logSuccess(user, AuditConstants.START_CONTAINER, "ContainerManageImpl", applicationID, containerId);
            // TODO launchedContainer misplaced -> doesn't necessarily mean a container
            // launch. A finished Application will not launch containers.
            metrics.launchedContainer();
            metrics.allocateContainer(containerTokenIdentifier.getResource());
        } else {
            throw new YarnException("Container start failed as the NodeManager is " + "in the process of shutting down");
        }
    } finally {
        this.readLock.unlock();
    }
}
Also used : ApplicationImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl) ApplicationContainerInitEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerInitEvent) ApplicationInitEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent) ByteString(com.google.protobuf.ByteString) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) FlowContext(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl.FlowContext) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Credentials(org.apache.hadoop.security.Credentials) LogAggregationContext(org.apache.hadoop.yarn.api.records.LogAggregationContext)

Example 2 with ContainerImpl

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl in project hadoop by apache.

the class TestResourceLocalizationService method testParallelDownloadAttemptsForPublicResource.

@Test(timeout = 100000)
@SuppressWarnings("unchecked")
public void testParallelDownloadAttemptsForPublicResource() throws Exception {
    DrainDispatcher dispatcher1 = null;
    String user = "testuser";
    try {
        // creating one local directory
        List<Path> localDirs = new ArrayList<Path>();
        String[] sDirs = new String[1];
        for (int i = 0; i < 1; ++i) {
            localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
            sDirs[i] = localDirs.get(i).toString();
        }
        conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
        // Registering event handlers
        EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
        dispatcher1 = new DrainDispatcher();
        dispatcher1.register(ApplicationEventType.class, applicationBus);
        EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
        dispatcher1.register(ContainerEventType.class, containerBus);
        ContainerExecutor exec = mock(ContainerExecutor.class);
        DeletionService delService = mock(DeletionService.class);
        LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
        // initializing directory handler.
        dirsHandler.init(conf);
        dispatcher1.init(conf);
        dispatcher1.start();
        // Creating and initializing ResourceLocalizationService but not starting
        // it as otherwise it will remove requests from pending queue.
        ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher1, exec, delService, dirsHandler, nmContext);
        ResourceLocalizationService spyService = spy(rawService);
        dispatcher1.register(LocalizationEventType.class, spyService);
        spyService.init(conf);
        // Initially pending map should be empty for public localizer
        Assert.assertEquals(0, spyService.getPublicLocalizer().pending.size());
        LocalResourceRequest req = new LocalResourceRequest(new Path("/tmp"), 123L, LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, "");
        // Initializing application
        ApplicationImpl app = mock(ApplicationImpl.class);
        ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
        when(app.getAppId()).thenReturn(appId);
        when(app.getUser()).thenReturn(user);
        dispatcher1.getEventHandler().handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
        // Container - 1
        // container requesting the resource
        ContainerImpl container1 = createMockContainer(user, 1);
        dispatcher1.getEventHandler().handle(createContainerLocalizationEvent(container1, LocalResourceVisibility.PUBLIC, req));
        // Waiting for resource to change into DOWNLOADING state.
        Assert.assertTrue(waitForResourceState(null, spyService, req, LocalResourceVisibility.PUBLIC, user, null, ResourceState.DOWNLOADING, 5000));
        // Waiting for download to start.
        Assert.assertTrue(waitForPublicDownloadToStart(spyService, 1, 5000));
        LocalizedResource lr = getLocalizedResource(spyService, req, LocalResourceVisibility.PUBLIC, user, null);
        // Resource would now have moved into DOWNLOADING state
        Assert.assertEquals(ResourceState.DOWNLOADING, lr.getState());
        // pending should have this resource now.
        Assert.assertEquals(1, spyService.getPublicLocalizer().pending.size());
        // Now resource should have 0 permit.
        Assert.assertEquals(0, lr.sem.availablePermits());
        // Container - 2
        // Container requesting the same resource.
        ContainerImpl container2 = createMockContainer(user, 2);
        dispatcher1.getEventHandler().handle(createContainerLocalizationEvent(container2, LocalResourceVisibility.PUBLIC, req));
        // Waiting for download to start. This should return false as new download
        // will not start
        Assert.assertFalse(waitForPublicDownloadToStart(spyService, 2, 5000));
        // Now Failing the resource download. As a part of it
        // resource state is changed and then lock is released.
        ResourceFailedLocalizationEvent locFailedEvent = new ResourceFailedLocalizationEvent(req, new Exception("test").toString());
        spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC, user, null).handle(locFailedEvent);
        // Waiting for resource to change into FAILED state.
        Assert.assertTrue(waitForResourceState(lr, spyService, req, LocalResourceVisibility.PUBLIC, user, null, ResourceState.FAILED, 5000));
        // releasing lock as a part of download failed process.
        lr.unlock();
        // removing pending download request.
        spyService.getPublicLocalizer().pending.clear();
        LocalizerContext lc = mock(LocalizerContext.class);
        when(lc.getContainerId()).thenReturn(ContainerId.newContainerId(ApplicationAttemptId.newInstance(ApplicationId.newInstance(1L, 1), 1), 1L));
        // Now I need to simulate a race condition wherein Event is added to
        // dispatcher before resource state changes to either FAILED or LOCALIZED
        // Hence sending event directly to dispatcher.
        LocalizerResourceRequestEvent localizerEvent = new LocalizerResourceRequestEvent(lr, null, lc, null);
        dispatcher1.getEventHandler().handle(localizerEvent);
        // Waiting for download to start. This should return false as new download
        // will not start
        Assert.assertFalse(waitForPublicDownloadToStart(spyService, 1, 5000));
        // Checking available permits now.
        Assert.assertEquals(1, lr.sem.availablePermits());
    } finally {
        if (dispatcher1 != null) {
            dispatcher1.stop();
        }
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) ContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor) DefaultContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor) ResourceFailedLocalizationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent) ApplicationImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl) ArrayList(java.util.ArrayList) ApplicationLocalizationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ApplicationLocalizationEvent) Path(org.apache.hadoop.fs.Path) ContainerEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent) ApplicationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent) DeletionService(org.apache.hadoop.yarn.server.nodemanager.DeletionService) LocalDirsHandlerService(org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) UnsupportedFileSystemException(org.apache.hadoop.fs.UnsupportedFileSystemException) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SerializedException(org.apache.hadoop.yarn.api.records.SerializedException) NotSerializableException(java.io.NotSerializableException) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) LocalizerResourceRequestEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 3 with ContainerImpl

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl in project hadoop by apache.

the class TestResourceLocalizationService method createMockContainer.

private ContainerImpl createMockContainer(String user, int containerId) {
    ContainerImpl container = mock(ContainerImpl.class);
    when(container.getContainerId()).thenReturn(BuilderUtils.newContainerId(1, 1, 1, containerId));
    when(container.getUser()).thenReturn(user);
    Credentials mockCredentials = mock(Credentials.class);
    when(container.getCredentials()).thenReturn(mockCredentials);
    when(container.getContainerState()).thenReturn(ContainerState.LOCALIZING);
    return container;
}
Also used : ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) Credentials(org.apache.hadoop.security.Credentials)

Example 4 with ContainerImpl

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl in project hadoop by apache.

the class TestAuxServices method testAuxEventDispatch.

@Test
public void testAuxEventDispatch() {
    Configuration conf = new Configuration();
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { "Asrv", "Bsrv" });
    conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Asrv"), ServiceA.class, Service.class);
    conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"), ServiceB.class, Service.class);
    conf.setInt("A.expected.init", 1);
    conf.setInt("B.expected.stop", 1);
    final AuxServices aux = new AuxServices();
    aux.init(conf);
    aux.start();
    ApplicationId appId1 = ApplicationId.newInstance(0, 65);
    ByteBuffer buf = ByteBuffer.allocate(6);
    buf.putChar('A');
    buf.putInt(65);
    buf.flip();
    AuxServicesEvent event = new AuxServicesEvent(AuxServicesEventType.APPLICATION_INIT, "user0", appId1, "Asrv", buf);
    aux.handle(event);
    ApplicationId appId2 = ApplicationId.newInstance(0, 66);
    event = new AuxServicesEvent(AuxServicesEventType.APPLICATION_STOP, "user0", appId2, "Bsrv", null);
    // verify all services got the stop event 
    aux.handle(event);
    Collection<AuxiliaryService> servs = aux.getServices();
    for (AuxiliaryService serv : servs) {
        ArrayList<Integer> appIds = ((LightService) serv).getAppIdsStopped();
        assertEquals("app not properly stopped", 1, appIds.size());
        assertTrue("wrong app stopped", appIds.contains((Integer) 66));
    }
    for (AuxiliaryService serv : servs) {
        assertNull(((LightService) serv).containerId);
        assertNull(((LightService) serv).resource);
    }
    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId1, 1);
    ContainerTokenIdentifier cti = new ContainerTokenIdentifier(ContainerId.newContainerId(attemptId, 1), "", "", Resource.newInstance(1, 1), 0, 0, 0, Priority.newInstance(0), 0);
    Context context = mock(Context.class);
    Container container = new ContainerImpl(new YarnConfiguration(), null, null, null, null, cti, context);
    ContainerId containerId = container.getContainerId();
    Resource resource = container.getResource();
    event = new AuxServicesEvent(AuxServicesEventType.CONTAINER_INIT, container);
    aux.handle(event);
    for (AuxiliaryService serv : servs) {
        assertEquals(containerId, ((LightService) serv).containerId);
        assertEquals(resource, ((LightService) serv).resource);
        ((LightService) serv).containerId = null;
        ((LightService) serv).resource = null;
    }
    event = new AuxServicesEvent(AuxServicesEventType.CONTAINER_STOP, container);
    aux.handle(event);
    for (AuxiliaryService serv : servs) {
        assertEquals(containerId, ((LightService) serv).containerId);
        assertEquals(resource, ((LightService) serv).resource);
    }
}
Also used : ApplicationTerminationContext(org.apache.hadoop.yarn.server.api.ApplicationTerminationContext) ApplicationInitializationContext(org.apache.hadoop.yarn.server.api.ApplicationInitializationContext) ContainerInitializationContext(org.apache.hadoop.yarn.server.api.ContainerInitializationContext) ContainerTerminationContext(org.apache.hadoop.yarn.server.api.ContainerTerminationContext) Context(org.apache.hadoop.yarn.server.nodemanager.Context) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ByteBuffer(java.nio.ByteBuffer) ContainerTokenIdentifier(org.apache.hadoop.yarn.security.ContainerTokenIdentifier) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) AuxiliaryService(org.apache.hadoop.yarn.server.api.AuxiliaryService) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 5 with ContainerImpl

use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl in project hadoop by apache.

the class TestNodeStatusUpdater method testCleanedupApplicationContainerCleanup.

@Test
public void testCleanedupApplicationContainerCleanup() throws IOException {
    NodeManager nm = new NodeManager();
    YarnConfiguration conf = new YarnConfiguration();
    conf.set(NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, "1000000");
    nm.init(conf);
    NodeStatusUpdaterImpl nodeStatusUpdater = (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
    ApplicationId appId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    ContainerId cId = ContainerId.newContainerId(appAttemptId, 1);
    Token containerToken = BuilderUtils.newContainerToken(cId, 0, "anyHost", 1234, "anyUser", BuilderUtils.newResource(1024, 1), 0, 123, "password".getBytes(), 0);
    Container anyCompletedContainer = new ContainerImpl(conf, null, null, null, null, BuilderUtils.newContainerTokenIdentifier(containerToken), nm.getNMContext()) {

        @Override
        public ContainerState getCurrentState() {
            return ContainerState.COMPLETE;
        }
    };
    Application application = mock(Application.class);
    when(application.getApplicationState()).thenReturn(ApplicationState.RUNNING);
    nm.getNMContext().getApplications().putIfAbsent(appId, application);
    nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    when(application.getApplicationState()).thenReturn(ApplicationState.FINISHING_CONTAINERS_WAIT);
    // The completed container will be saved in case of lost heartbeat.
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
    nm.getNMContext().getApplications().remove(appId);
    // The completed container will be saved in case of lost heartbeat.
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
    Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
}
Also used : Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl) Token(org.apache.hadoop.yarn.api.records.Token) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Test(org.junit.Test)

Aggregations

ContainerImpl (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl)13 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 Test (org.junit.Test)9 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)8 Application (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)6 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)5 ArrayList (java.util.ArrayList)4 Token (org.apache.hadoop.yarn.api.records.Token)4 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)4 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)4 Path (org.apache.hadoop.fs.Path)3 Credentials (org.apache.hadoop.security.Credentials)3 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)3 Configuration (org.apache.hadoop.conf.Configuration)2 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)2 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)2 ContainerTokenIdentifier (org.apache.hadoop.yarn.security.ContainerTokenIdentifier)2 ContainerExecutor (org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor)2 Context (org.apache.hadoop.yarn.server.nodemanager.Context)2