use of org.apache.hadoop.yarn.api.records.ContainerLaunchContext in project hadoop by apache.
the class TestContainerManagerRecovery method testApplicationRecovery.
@Test
public void testApplicationRecovery() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user");
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
String appUser = "app_user1";
String modUser = "modify_user1";
String viewUser = "view_user1";
String enemyUser = "enemy_user";
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>();
acls.put(ApplicationAccessType.MODIFY_APP, modUser);
acls.put(ApplicationAccessType.VIEW_APP, viewUser);
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation");
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// reset container manager and verify app recovered with proper acls
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// check whether LogAggregationContext is recovered correctly
LogAggregationContext recovered = ((ApplicationImpl) app).getLogAggregationContext();
assertNotNull(recovered);
assertEquals(logAggregationContext.getIncludePattern(), recovered.getIncludePattern());
assertEquals(logAggregationContext.getExcludePattern(), recovered.getExcludePattern());
assertEquals(logAggregationContext.getRolledLogsIncludePattern(), recovered.getRolledLogsIncludePattern());
assertEquals(logAggregationContext.getRolledLogsExcludePattern(), recovered.getRolledLogsExcludePattern());
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// restart and verify app is marked for finishing
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate log aggregation completion
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
use of org.apache.hadoop.yarn.api.records.ContainerLaunchContext in project hadoop by apache.
the class TestContainerManager method testContainerLaunchAndSignal.
// Verify signal container request can be delivered from
// NodeStatusUpdaterImpl to ContainerExecutor.
private void testContainerLaunchAndSignal(SignalContainerCommand command) throws IOException, InterruptedException, YarnException {
Signal signal = ContainerLaunch.translateCommandToSignal(command);
containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile();
// So that start file is readable by the test
fileWriter.write("\numask 0");
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 1000s");
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
containerLaunchContext.setCommands(commands);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
// Simulate NodeStatusUpdaterImpl sending CMgrSignalContainersEvent
SignalContainerRequest signalReq = SignalContainerRequest.newInstance(cId, command);
List<SignalContainerRequest> reqs = new ArrayList<>();
reqs.add(signalReq);
containerManager.handle(new CMgrSignalContainersEvent(reqs));
final ArgumentCaptor<ContainerSignalContext> signalContextCaptor = ArgumentCaptor.forClass(ContainerSignalContext.class);
if (signal.equals(Signal.NULL)) {
verify(exec, never()).signalContainer(signalContextCaptor.capture());
} else {
verify(exec, timeout(10000).atLeastOnce()).signalContainer(signalContextCaptor.capture());
ContainerSignalContext signalContext = signalContextCaptor.getAllValues().get(0);
Assert.assertEquals(cId, signalContext.getContainer().getContainerId());
Assert.assertEquals(signal, signalContext.getSignal());
}
}
use of org.apache.hadoop.yarn.api.records.ContainerLaunchContext in project hadoop by apache.
the class TestContainerManager method testLocalFilesCleanup.
@Test
public void testLocalFilesCleanup() throws InterruptedException, IOException, YarnException {
// Real del service
delSrvc = new DeletionService(exec);
delSrvc.init(conf);
containerManager = createContainerManager(delSrvc);
containerManager.init(conf);
containerManager.start();
// ////// Create the resources for the container
File dir = new File(tmpDir, "dir");
dir.mkdirs();
File file = new File(dir, "file");
PrintWriter fileWriter = new PrintWriter(file);
fileWriter.write("Hello World!");
fileWriter.close();
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
// ////// Construct the container-spec.
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// containerLaunchContext.resources =
// new HashMap<CharSequence, LocalResource>();
URL resource_alpha = URL.fromPath(FileContext.getLocalFSFileContext().makeQualified(new Path(file.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(file.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.RUNNING);
// Now ascertain that the resources are localised correctly.
String appIDStr = appId.toString();
String containerIDStr = cId.toString();
File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE);
File userDir = new File(userCacheDir, user);
File appCache = new File(userDir, ContainerLocalizer.APPCACHE);
File appDir = new File(appCache, appIDStr);
File containerDir = new File(appDir, containerIDStr);
File targetFile = new File(containerDir, destinationFile);
File sysDir = new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR);
File appSysDir = new File(sysDir, appIDStr);
File containerSysDir = new File(appSysDir, containerIDStr);
// AppDir should still exist
Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", appDir.exists());
Assert.assertTrue("AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", appSysDir.exists());
for (File f : new File[] { containerDir, containerSysDir }) {
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
// Simulate RM sending an AppFinish event.
containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.FINISHED);
// Now ascertain that the resources are localised correctly.
for (File f : new File[] { appDir, containerDir, appSysDir, containerSysDir }) {
// Wait for deletion. Deletion can happen long after AppFinish because of
// the async DeletionService
int timeout = 0;
while (f.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
// Wait for deletion
int timeout = 0;
while (targetFile.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
}
use of org.apache.hadoop.yarn.api.records.ContainerLaunchContext in project hadoop by apache.
the class TestContainerManager method testIncreaseContainerResourceWithInvalidRequests.
@Test
public void testIncreaseContainerResourceWithInvalidRequests() throws Exception {
containerManager.start();
// Start 4 containers 0..4 with default resource (1024, 1)
List<StartContainerRequest> list = new ArrayList<>();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
for (int i = 0; i < 4; i++) {
ContainerId cId = createContainerId(i);
long identifier = DUMMY_RM_IDENTIFIER;
Token containerToken = createContainerToken(cId, identifier, context.getNodeId(), user, context.getContainerTokenSecretManager());
StartContainerRequest request = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
list.add(request);
}
StartContainersRequest requestList = StartContainersRequest.newInstance(list);
StartContainersResponse response = containerManager.startContainers(requestList);
Assert.assertEquals(4, response.getSuccessfullyStartedContainers().size());
int i = 0;
for (ContainerId id : response.getSuccessfullyStartedContainers()) {
Assert.assertEquals(i, id.getContainerId());
i++;
}
Thread.sleep(2000);
// Construct container resource increase request,
List<Token> increaseTokens = new ArrayList<>();
// Add increase request for container-0, the request will fail as the
// container will have exited, and won't be in RUNNING state
ContainerId cId0 = createContainerId(0);
Token containerToken = createContainerToken(cId0, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, Resource.newInstance(1234, 3), context.getContainerTokenSecretManager(), null);
increaseTokens.add(containerToken);
// Add increase request for container-7, the request will fail as the
// container does not exist
ContainerId cId7 = createContainerId(7);
containerToken = createContainerToken(cId7, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, Resource.newInstance(1234, 3), context.getContainerTokenSecretManager(), null);
increaseTokens.add(containerToken);
IncreaseContainersResourceRequest increaseRequest = IncreaseContainersResourceRequest.newInstance(increaseTokens);
IncreaseContainersResourceResponse increaseResponse = containerManager.increaseContainersResource(increaseRequest);
// Check response
Assert.assertEquals(0, increaseResponse.getSuccessfullyIncreasedContainers().size());
Assert.assertEquals(2, increaseResponse.getFailedRequests().size());
for (Map.Entry<ContainerId, SerializedException> entry : increaseResponse.getFailedRequests().entrySet()) {
Assert.assertNotNull("Failed message", entry.getValue().getMessage());
if (cId0.equals(entry.getKey())) {
Assert.assertTrue(entry.getValue().getMessage().contains("Resource can only be changed when a " + "container is in RUNNING state"));
} else if (cId7.equals(entry.getKey())) {
Assert.assertTrue(entry.getValue().getMessage().contains("Container " + cId7.toString() + " is not handled by this NodeManager"));
} else {
throw new YarnException("Received failed request from wrong" + " container: " + entry.getKey().toString());
}
}
}
use of org.apache.hadoop.yarn.api.records.ContainerLaunchContext in project hadoop by apache.
the class TestContainerManager method testStartContainerFailureWithUnknownAuxService.
@Test
public void testStartContainerFailureWithUnknownAuxService() throws Exception {
conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { "existService" });
conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "existService"), ServiceA.class, Service.class);
containerManager.start();
List<StartContainerRequest> startRequest = new ArrayList<>();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
String serviceName = "non_exist_auxService";
serviceData.put(serviceName, ByteBuffer.wrap(serviceName.getBytes()));
containerLaunchContext.setServiceData(serviceData);
ContainerId cId = createContainerId(0);
String user = "start_container_fail";
Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager());
StartContainerRequest request = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
// start containers
startRequest.add(request);
StartContainersRequest requestList = StartContainersRequest.newInstance(startRequest);
StartContainersResponse response = containerManager.startContainers(requestList);
Assert.assertEquals(1, response.getFailedRequests().size());
Assert.assertEquals(0, response.getSuccessfullyStartedContainers().size());
Assert.assertTrue(response.getFailedRequests().containsKey(cId));
Assert.assertTrue(response.getFailedRequests().get(cId).getMessage().contains("The auxService:" + serviceName + " does not exist"));
}
Aggregations