Search in sources :

Example 1 with CMgrCompletedAppsEvent

use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.

the class ContainerManagerImpl method handle.

@SuppressWarnings("unchecked")
@Override
public void handle(ContainerManagerEvent event) {
    switch(event.getType()) {
        case FINISH_APPS:
            CMgrCompletedAppsEvent appsFinishedEvent = (CMgrCompletedAppsEvent) event;
            for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) {
                Application app = this.context.getApplications().get(appID);
                if (app == null) {
                    LOG.warn("couldn't find application " + appID + " while processing" + " FINISH_APPS event");
                    continue;
                }
                boolean shouldDropEvent = false;
                for (Container container : app.getContainers().values()) {
                    if (container.isRecovering()) {
                        LOG.info("drop FINISH_APPS event to " + appID + " because " + "container " + container.getContainerId() + " is recovering");
                        shouldDropEvent = true;
                        break;
                    }
                }
                if (shouldDropEvent) {
                    continue;
                }
                String diagnostic = "";
                if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) {
                    diagnostic = "Application killed on shutdown";
                } else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
                    diagnostic = "Application killed by ResourceManager";
                }
                this.dispatcher.getEventHandler().handle(new ApplicationFinishEvent(appID, diagnostic));
            }
            break;
        case FINISH_CONTAINERS:
            CMgrCompletedContainersEvent containersFinishedEvent = (CMgrCompletedContainersEvent) event;
            for (ContainerId containerId : containersFinishedEvent.getContainersToCleanup()) {
                ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();
                Application app = this.context.getApplications().get(appId);
                if (app == null) {
                    LOG.warn("couldn't find app " + appId + " while processing" + " FINISH_CONTAINERS event");
                    continue;
                }
                Container container = app.getContainers().get(containerId);
                if (container == null) {
                    LOG.warn("couldn't find container " + containerId + " while processing FINISH_CONTAINERS event");
                    continue;
                }
                if (container.isRecovering()) {
                    LOG.info("drop FINISH_CONTAINERS event to " + containerId + " because container is recovering");
                    continue;
                }
                this.dispatcher.getEventHandler().handle(new ContainerKillEvent(containerId, ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, "Container Killed by ResourceManager"));
            }
            break;
        case DECREASE_CONTAINERS_RESOURCE:
            CMgrDecreaseContainersResourceEvent containersDecreasedEvent = (CMgrDecreaseContainersResourceEvent) event;
            for (org.apache.hadoop.yarn.api.records.Container container : containersDecreasedEvent.getContainersToDecrease()) {
                try {
                    changeContainerResourceInternal(container.getId(), container.getVersion(), container.getResource(), false);
                } catch (YarnException e) {
                    LOG.error("Unable to decrease container resource", e);
                } catch (IOException e) {
                    LOG.error("Unable to update container resource in store", e);
                }
            }
            break;
        case SIGNAL_CONTAINERS:
            CMgrSignalContainersEvent containersSignalEvent = (CMgrSignalContainersEvent) event;
            for (SignalContainerRequest request : containersSignalEvent.getContainersToSignal()) {
                internalSignalToContainer(request, "ResourceManager");
            }
            break;
        default:
            throw new YarnRuntimeException("Got an unknown ContainerManagerEvent type: " + event.getType());
    }
}
Also used : ApplicationFinishEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationFinishEvent) CMgrDecreaseContainersResourceEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent) CMgrSignalContainersEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent) CMgrCompletedContainersEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent) SignalContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest) CMgrCompletedAppsEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent) ContainerKillEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)

Example 2 with CMgrCompletedAppsEvent

use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.

the class TestContainerManager method testLocalFilesCleanup.

@Test
public void testLocalFilesCleanup() throws InterruptedException, IOException, YarnException {
    // Real del service
    delSrvc = new DeletionService(exec);
    delSrvc.init(conf);
    containerManager = createContainerManager(delSrvc);
    containerManager.init(conf);
    containerManager.start();
    // ////// Create the resources for the container
    File dir = new File(tmpDir, "dir");
    dir.mkdirs();
    File file = new File(dir, "file");
    PrintWriter fileWriter = new PrintWriter(file);
    fileWriter.write("Hello World!");
    fileWriter.close();
    // ////// Construct the Container-id
    ContainerId cId = createContainerId(0);
    ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
    // ////// Construct the container-spec.
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    //    containerLaunchContext.resources =
    //        new HashMap<CharSequence, LocalResource>();
    URL resource_alpha = URL.fromPath(FileContext.getLocalFSFileContext().makeQualified(new Path(file.getAbsolutePath())));
    LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
    rsrc_alpha.setResource(resource_alpha);
    rsrc_alpha.setSize(-1);
    rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
    rsrc_alpha.setType(LocalResourceType.FILE);
    rsrc_alpha.setTimestamp(file.lastModified());
    String destinationFile = "dest_file";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, rsrc_alpha);
    containerLaunchContext.setLocalResources(localResources);
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
    BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.RUNNING);
    // Now ascertain that the resources are localised correctly.
    String appIDStr = appId.toString();
    String containerIDStr = cId.toString();
    File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE);
    File userDir = new File(userCacheDir, user);
    File appCache = new File(userDir, ContainerLocalizer.APPCACHE);
    File appDir = new File(appCache, appIDStr);
    File containerDir = new File(appDir, containerIDStr);
    File targetFile = new File(containerDir, destinationFile);
    File sysDir = new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR);
    File appSysDir = new File(sysDir, appIDStr);
    File containerSysDir = new File(appSysDir, containerIDStr);
    // AppDir should still exist
    Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", appDir.exists());
    Assert.assertTrue("AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", appSysDir.exists());
    for (File f : new File[] { containerDir, containerSysDir }) {
        Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
    }
    Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
    // Simulate RM sending an AppFinish event.
    containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
    BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.FINISHED);
    // Now ascertain that the resources are localised correctly.
    for (File f : new File[] { appDir, containerDir, appSysDir, containerSysDir }) {
        // Wait for deletion. Deletion can happen long after AppFinish because of
        // the async DeletionService
        int timeout = 0;
        while (f.exists() && timeout++ < 15) {
            Thread.sleep(1000);
        }
        Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
    }
    // Wait for deletion
    int timeout = 0;
    while (targetFile.exists() && timeout++ < 15) {
        Thread.sleep(1000);
    }
    Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
}
Also used : Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) HashMap(java.util.HashMap) DeletionService(org.apache.hadoop.yarn.server.nodemanager.DeletionService) ArrayList(java.util.ArrayList) CMgrCompletedAppsEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) URL(org.apache.hadoop.yarn.api.records.URL) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 3 with CMgrCompletedAppsEvent

use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.

the class ContainerManagerImpl method cleanUpApplicationsOnNMShutDown.

public void cleanUpApplicationsOnNMShutDown() {
    Map<ApplicationId, Application> applications = this.context.getApplications();
    if (applications.isEmpty()) {
        return;
    }
    LOG.info("Applications still running : " + applications.keySet());
    if (this.context.getNMStateStore().canRecover() && !this.context.getDecommissioned()) {
        if (getConfig().getBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, YarnConfiguration.DEFAULT_NM_RECOVERY_SUPERVISED)) {
            // do not cleanup apps as they can be recovered on restart
            return;
        }
    }
    List<ApplicationId> appIds = new ArrayList<ApplicationId>(applications.keySet());
    this.handle(new CMgrCompletedAppsEvent(appIds, CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
    LOG.info("Waiting for Applications to be Finished");
    long waitStartTime = System.currentTimeMillis();
    while (!applications.isEmpty() && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
            LOG.warn("Interrupted while sleeping on applications finish on shutdown", ex);
        }
    }
    // All applications Finished
    if (applications.isEmpty()) {
        LOG.info("All applications in FINISHED state");
    } else {
        LOG.info("Done waiting for Applications to be Finished. Still alive: " + applications.keySet());
    }
}
Also used : ArrayList(java.util.ArrayList) CMgrCompletedAppsEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)

Example 4 with CMgrCompletedAppsEvent

use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.

the class TestLogAggregationService method testLogAggregationForRealContainerLaunch.

@Test
public void testLogAggregationForRealContainerLaunch() throws IOException, InterruptedException, YarnException {
    this.containerManager.start();
    File scriptFile = new File(tmpDir, "scriptFile.sh");
    PrintWriter fileWriter = new PrintWriter(scriptFile);
    fileWriter.write("\necho Hello World! Stdout! > " + new File(localLogDir, "stdout"));
    fileWriter.write("\necho Hello World! Stderr! > " + new File(localLogDir, "stderr"));
    fileWriter.write("\necho Hello World! Syslog! > " + new File(localLogDir, "syslog"));
    fileWriter.close();
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    // ////// Construct the Container-id
    ApplicationId appId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 1);
    ContainerId cId = BuilderUtils.newContainerId(appAttemptId, 0);
    URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
    LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
    rsrc_alpha.setResource(resource_alpha);
    rsrc_alpha.setSize(-1);
    rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
    rsrc_alpha.setType(LocalResourceType.FILE);
    rsrc_alpha.setTimestamp(scriptFile.lastModified());
    String destinationFile = "dest_file";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, rsrc_alpha);
    containerLaunchContext.setLocalResources(localResources);
    List<String> commands = new ArrayList<String>();
    commands.add("/bin/bash");
    commands.add(scriptFile.getAbsolutePath());
    containerLaunchContext.setCommands(commands);
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, TestContainerManager.createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    this.containerManager.startContainers(allRequests);
    BaseContainerManagerTest.waitForContainerState(this.containerManager, cId, ContainerState.COMPLETE);
    this.containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(appId), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
    this.containerManager.stop();
}
Also used : Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CMgrCompletedAppsEvent(org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) URL(org.apache.hadoop.yarn.api.records.URL) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) PrintWriter(java.io.PrintWriter) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Aggregations

ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 CMgrCompletedAppsEvent (org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent)4 ArrayList (java.util.ArrayList)3 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 File (java.io.File)2 PrintWriter (java.io.PrintWriter)2 HashMap (java.util.HashMap)2 Path (org.apache.hadoop.fs.Path)2 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)2 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)2 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)2 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)2 URL (org.apache.hadoop.yarn.api.records.URL)2 Application (org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application)2 Test (org.junit.Test)2 ByteString (com.google.protobuf.ByteString)1 IOException (java.io.IOException)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 SignalContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest)1 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)1