use of org.apache.hadoop.yarn.api.records.LocalResource in project hadoop by apache.
the class TestContainerManagerRecovery method testContainerCleanupOnShutdown.
@Test
public void testContainerCleanupOnShutdown() throws Exception {
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = Collections.emptyMap();
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern");
// verify containers are stopped on shutdown without recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false);
Context context = createContext(conf, new NMNullStateStoreService());
ContainerManagerImpl cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
verify(cm).handle(isA(CMgrCompletedAppsEvent.class));
// verify containers are stopped on shutdown with unsupervised recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false);
NMMemoryStateStoreService memStore = new NMMemoryStateStoreService();
memStore.init(conf);
memStore.start();
context = createContext(conf, memStore);
cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
memStore.close();
verify(cm).handle(isA(CMgrCompletedAppsEvent.class));
// verify containers are not stopped on shutdown with supervised recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
memStore = new NMMemoryStateStoreService();
memStore.init(conf);
memStore.start();
context = createContext(conf, memStore);
cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
memStore.close();
verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class));
}
use of org.apache.hadoop.yarn.api.records.LocalResource in project hadoop by apache.
the class TestContainerManagerRecovery method testApplicationRecovery.
@Test
public void testApplicationRecovery() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user");
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
String appUser = "app_user1";
String modUser = "modify_user1";
String viewUser = "view_user1";
String enemyUser = "enemy_user";
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>();
acls.put(ApplicationAccessType.MODIFY_APP, modUser);
acls.put(ApplicationAccessType.VIEW_APP, viewUser);
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation");
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// reset container manager and verify app recovered with proper acls
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// check whether LogAggregationContext is recovered correctly
LogAggregationContext recovered = ((ApplicationImpl) app).getLogAggregationContext();
assertNotNull(recovered);
assertEquals(logAggregationContext.getIncludePattern(), recovered.getIncludePattern());
assertEquals(logAggregationContext.getExcludePattern(), recovered.getExcludePattern());
assertEquals(logAggregationContext.getRolledLogsIncludePattern(), recovered.getRolledLogsIncludePattern());
assertEquals(logAggregationContext.getRolledLogsExcludePattern(), recovered.getRolledLogsExcludePattern());
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// restart and verify app is marked for finishing
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate log aggregation completion
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
use of org.apache.hadoop.yarn.api.records.LocalResource in project hadoop by apache.
the class TestContainerManager method testContainerLaunchAndSignal.
// Verify signal container request can be delivered from
// NodeStatusUpdaterImpl to ContainerExecutor.
private void testContainerLaunchAndSignal(SignalContainerCommand command) throws IOException, InterruptedException, YarnException {
Signal signal = ContainerLaunch.translateCommandToSignal(command);
containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile();
// So that start file is readable by the test
fileWriter.write("\numask 0");
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 1000s");
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
containerLaunchContext.setCommands(commands);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
// Simulate NodeStatusUpdaterImpl sending CMgrSignalContainersEvent
SignalContainerRequest signalReq = SignalContainerRequest.newInstance(cId, command);
List<SignalContainerRequest> reqs = new ArrayList<>();
reqs.add(signalReq);
containerManager.handle(new CMgrSignalContainersEvent(reqs));
final ArgumentCaptor<ContainerSignalContext> signalContextCaptor = ArgumentCaptor.forClass(ContainerSignalContext.class);
if (signal.equals(Signal.NULL)) {
verify(exec, never()).signalContainer(signalContextCaptor.capture());
} else {
verify(exec, timeout(10000).atLeastOnce()).signalContainer(signalContextCaptor.capture());
ContainerSignalContext signalContext = signalContextCaptor.getAllValues().get(0);
Assert.assertEquals(cId, signalContext.getContainer().getContainerId());
Assert.assertEquals(signal, signalContext.getSignal());
}
}
use of org.apache.hadoop.yarn.api.records.LocalResource in project hadoop by apache.
the class TestContainerManager method testLocalFilesCleanup.
@Test
public void testLocalFilesCleanup() throws InterruptedException, IOException, YarnException {
// Real del service
delSrvc = new DeletionService(exec);
delSrvc.init(conf);
containerManager = createContainerManager(delSrvc);
containerManager.init(conf);
containerManager.start();
// ////// Create the resources for the container
File dir = new File(tmpDir, "dir");
dir.mkdirs();
File file = new File(dir, "file");
PrintWriter fileWriter = new PrintWriter(file);
fileWriter.write("Hello World!");
fileWriter.close();
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
// ////// Construct the container-spec.
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// containerLaunchContext.resources =
// new HashMap<CharSequence, LocalResource>();
URL resource_alpha = URL.fromPath(FileContext.getLocalFSFileContext().makeQualified(new Path(file.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(file.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.RUNNING);
// Now ascertain that the resources are localised correctly.
String appIDStr = appId.toString();
String containerIDStr = cId.toString();
File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE);
File userDir = new File(userCacheDir, user);
File appCache = new File(userDir, ContainerLocalizer.APPCACHE);
File appDir = new File(appCache, appIDStr);
File containerDir = new File(appDir, containerIDStr);
File targetFile = new File(containerDir, destinationFile);
File sysDir = new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR);
File appSysDir = new File(sysDir, appIDStr);
File containerSysDir = new File(appSysDir, containerIDStr);
// AppDir should still exist
Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", appDir.exists());
Assert.assertTrue("AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", appSysDir.exists());
for (File f : new File[] { containerDir, containerSysDir }) {
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
// Simulate RM sending an AppFinish event.
containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.FINISHED);
// Now ascertain that the resources are localised correctly.
for (File f : new File[] { appDir, containerDir, appSysDir, containerSysDir }) {
// Wait for deletion. Deletion can happen long after AppFinish because of
// the async DeletionService
int timeout = 0;
while (f.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
// Wait for deletion
int timeout = 0;
while (targetFile.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
}
use of org.apache.hadoop.yarn.api.records.LocalResource in project hadoop by apache.
the class TestNodeManagerShutdown method startContainer.
public static void startContainer(NodeManager nm, ContainerId cId, FileContext localFS, File scriptFileDir, File processStartFile, final int port) throws IOException, YarnException {
File scriptFile = createUnhaltingScriptFile(cId, scriptFileDir, processStartFile);
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
NodeId nodeId = BuilderUtils.newNodeId(InetAddress.getByName("localhost").getCanonicalHostName(), port);
URL localResourceUri = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource localResource = recordFactory.newRecordInstance(LocalResource.class);
localResource.setResource(localResourceUri);
localResource.setSize(-1);
localResource.setVisibility(LocalResourceVisibility.APPLICATION);
localResource.setType(LocalResourceType.FILE);
localResource.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, localResource);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
final InetSocketAddress containerManagerBindAddress = NetUtils.createSocketAddrForHost("127.0.0.1", port);
UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(cId.toString());
org.apache.hadoop.security.token.Token<NMTokenIdentifier> nmToken = ConverterUtils.convertFromYarn(nm.getNMContext().getNMTokenSecretManager().createNMToken(cId.getApplicationAttemptId(), nodeId, user), containerManagerBindAddress);
currentUser.addToken(nmToken);
ContainerManagementProtocol containerManager = currentUser.doAs(new PrivilegedAction<ContainerManagementProtocol>() {
@Override
public ContainerManagementProtocol run() {
Configuration conf = new Configuration();
YarnRPC rpc = YarnRPC.create(conf);
InetSocketAddress containerManagerBindAddress = NetUtils.createSocketAddrForHost("127.0.0.1", port);
return (ContainerManagementProtocol) rpc.getProxy(ContainerManagementProtocol.class, containerManagerBindAddress, conf);
}
});
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, TestContainerManager.createContainerToken(cId, 0, nodeId, user, nm.getNMContext().getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(request).getContainerStatuses().get(0);
Assert.assertTrue(EnumSet.of(ContainerState.RUNNING, ContainerState.SCHEDULED).contains(containerStatus.getState()));
}
Aggregations