use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService in project hadoop by apache.
the class TestDeletionService method testRecovery.
@Test
public void testRecovery() throws Exception {
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
System.out.println("SEED: " + seed);
List<Path> baseDirs = buildDirs(r, base, 4);
createDirs(new Path("."), baseDirs);
List<Path> content = buildDirs(r, new Path("."), 10);
for (Path b : baseDirs) {
createDirs(b, content);
}
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 1);
NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
DeletionService del = new DeletionService(new FakeDefaultContainerExecutor(), stateStore);
try {
del.init(conf);
del.start();
for (Path p : content) {
assertTrue(lfs.util().exists(new Path(baseDirs.get(0), p)));
del.delete((Long.parseLong(p.getName()) % 2) == 0 ? null : "dingo", p, baseDirs.toArray(new Path[4]));
}
// restart the deletion service
del.stop();
del = new DeletionService(new FakeDefaultContainerExecutor(), stateStore);
del.init(conf);
del.start();
// verify paths are still eventually deleted
int msecToWait = 10 * 1000;
for (Path p : baseDirs) {
for (Path q : content) {
Path fp = new Path(p, q);
while (msecToWait > 0 && lfs.util().exists(fp)) {
Thread.sleep(100);
msecToWait -= 100;
}
assertFalse(lfs.util().exists(fp));
}
}
} finally {
del.close();
stateStore.close();
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService in project hadoop by apache.
the class TestNMContainerTokenSecretManager method testRecovery.
@Test
public void testRecovery() throws IOException {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
final NodeId nodeId = NodeId.newInstance("somehost", 1234);
final ContainerId cid1 = BuilderUtils.newContainerId(1, 1, 1, 1);
final ContainerId cid2 = BuilderUtils.newContainerId(2, 2, 2, 2);
ContainerTokenKeyGeneratorForTest keygen = new ContainerTokenKeyGeneratorForTest(conf);
NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
NMContainerTokenSecretManager secretMgr = new NMContainerTokenSecretManager(conf, stateStore);
secretMgr.setNodeId(nodeId);
MasterKey currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
ContainerTokenIdentifier tokenId1 = createContainerTokenId(cid1, nodeId, "user1", secretMgr);
ContainerTokenIdentifier tokenId2 = createContainerTokenId(cid2, nodeId, "user2", secretMgr);
assertNotNull(secretMgr.retrievePassword(tokenId1));
assertNotNull(secretMgr.retrievePassword(tokenId2));
// restart and verify tokens still valid
secretMgr = new NMContainerTokenSecretManager(conf, stateStore);
secretMgr.setNodeId(nodeId);
secretMgr.recover();
assertEquals(currentKey, secretMgr.getCurrentKey());
assertTrue(secretMgr.isValidStartContainerRequest(tokenId1));
assertTrue(secretMgr.isValidStartContainerRequest(tokenId2));
assertNotNull(secretMgr.retrievePassword(tokenId1));
assertNotNull(secretMgr.retrievePassword(tokenId2));
// roll master key and start a container
secretMgr.startContainerSuccessful(tokenId2);
currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
// restart and verify tokens still valid due to prev key persist
secretMgr = new NMContainerTokenSecretManager(conf, stateStore);
secretMgr.setNodeId(nodeId);
secretMgr.recover();
assertEquals(currentKey, secretMgr.getCurrentKey());
assertTrue(secretMgr.isValidStartContainerRequest(tokenId1));
assertFalse(secretMgr.isValidStartContainerRequest(tokenId2));
assertNotNull(secretMgr.retrievePassword(tokenId1));
assertNotNull(secretMgr.retrievePassword(tokenId2));
// roll master key again, restart, and verify keys no longer valid
currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
secretMgr = new NMContainerTokenSecretManager(conf, stateStore);
secretMgr.setNodeId(nodeId);
secretMgr.recover();
assertEquals(currentKey, secretMgr.getCurrentKey());
assertTrue(secretMgr.isValidStartContainerRequest(tokenId1));
assertFalse(secretMgr.isValidStartContainerRequest(tokenId2));
try {
secretMgr.retrievePassword(tokenId1);
fail("token should not be valid");
} catch (InvalidToken e) {
// expected
}
try {
secretMgr.retrievePassword(tokenId2);
fail("token should not be valid");
} catch (InvalidToken e) {
// expected
}
stateStore.close();
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService in project hadoop by apache.
the class TestNMTokenSecretManagerInNM method testRecovery.
@Test
public void testRecovery() throws IOException {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
final NodeId nodeId = NodeId.newInstance("somehost", 1234);
final ApplicationAttemptId attempt1 = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1);
final ApplicationAttemptId attempt2 = ApplicationAttemptId.newInstance(ApplicationId.newInstance(2, 2), 2);
NMTokenKeyGeneratorForTest keygen = new NMTokenKeyGeneratorForTest();
NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
NMTokenSecretManagerInNM secretMgr = new NMTokenSecretManagerInNM(stateStore);
secretMgr.setNodeId(nodeId);
MasterKey currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
NMTokenIdentifier attemptToken1 = getNMTokenId(secretMgr.createNMToken(attempt1, nodeId, "user1"));
NMTokenIdentifier attemptToken2 = getNMTokenId(secretMgr.createNMToken(attempt2, nodeId, "user2"));
secretMgr.appAttemptStartContainer(attemptToken1);
secretMgr.appAttemptStartContainer(attemptToken2);
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1));
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2));
assertNotNull(secretMgr.retrievePassword(attemptToken1));
assertNotNull(secretMgr.retrievePassword(attemptToken2));
// restart and verify key is still there and token still valid
secretMgr = new NMTokenSecretManagerInNM(stateStore);
secretMgr.recover();
secretMgr.setNodeId(nodeId);
assertEquals(currentKey, secretMgr.getCurrentKey());
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1));
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2));
assertNotNull(secretMgr.retrievePassword(attemptToken1));
assertNotNull(secretMgr.retrievePassword(attemptToken2));
// roll master key and remove an app
currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
secretMgr.appFinished(attempt1.getApplicationId());
// restart and verify attempt1 key is still valid due to prev key persist
secretMgr = new NMTokenSecretManagerInNM(stateStore);
secretMgr.recover();
secretMgr.setNodeId(nodeId);
assertEquals(currentKey, secretMgr.getCurrentKey());
assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1));
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2));
assertNotNull(secretMgr.retrievePassword(attemptToken1));
assertNotNull(secretMgr.retrievePassword(attemptToken2));
// roll master key again, restart, and verify attempt1 key is bad but
// attempt2 is still good due to app key persist
currentKey = keygen.generateKey();
secretMgr.setMasterKey(currentKey);
secretMgr = new NMTokenSecretManagerInNM(stateStore);
secretMgr.recover();
secretMgr.setNodeId(nodeId);
assertEquals(currentKey, secretMgr.getCurrentKey());
assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1));
assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2));
try {
secretMgr.retrievePassword(attemptToken1);
fail("attempt token should not still be valid");
} catch (InvalidToken e) {
// expected
}
assertNotNull(secretMgr.retrievePassword(attemptToken2));
// remove last attempt, restart, verify both tokens are now bad
secretMgr.appFinished(attempt2.getApplicationId());
secretMgr = new NMTokenSecretManagerInNM(stateStore);
secretMgr.recover();
secretMgr.setNodeId(nodeId);
assertEquals(currentKey, secretMgr.getCurrentKey());
assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1));
assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2));
try {
secretMgr.retrievePassword(attemptToken1);
fail("attempt token should not still be valid");
} catch (InvalidToken e) {
// expected
}
try {
secretMgr.retrievePassword(attemptToken2);
fail("attempt token should not still be valid");
} catch (InvalidToken e) {
// expected
}
stateStore.close();
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService in project hadoop by apache.
the class TestResourceLocalizationService method testRecovery.
@Test
// mocked generics
@SuppressWarnings("unchecked")
public void testRecovery() throws Exception {
final String user1 = "user1";
final String user2 = "user2";
final ApplicationId appId1 = ApplicationId.newInstance(1, 1);
final ApplicationId appId2 = ApplicationId.newInstance(1, 2);
List<Path> localDirs = new ArrayList<Path>();
String[] sDirs = new String[4];
for (int i = 0; i < 4; ++i) {
localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
sDirs[i] = localDirs.get(i).toString();
}
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
dispatcher.register(ApplicationEventType.class, applicationBus);
EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
dispatcher.register(ContainerEventType.class, containerBus);
//Ignore actual localization
EventHandler<LocalizerEvent> localizerBus = mock(EventHandler.class);
dispatcher.register(LocalizerEventType.class, localizerBus);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
ResourceLocalizationService spyService = createSpyService(dispatcher, dirsHandler, stateStore);
try {
spyService.init(conf);
spyService.start();
final Application app1 = mock(Application.class);
when(app1.getUser()).thenReturn(user1);
when(app1.getAppId()).thenReturn(appId1);
final Application app2 = mock(Application.class);
when(app2.getUser()).thenReturn(user2);
when(app2.getAppId()).thenReturn(appId2);
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app1));
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app2));
dispatcher.await();
//Get a handle on the trackers after they're setup with INIT_APP_RESOURCES
LocalResourcesTracker appTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user1, appId1);
LocalResourcesTracker privTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE, user1, null);
LocalResourcesTracker appTracker2 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user2, appId2);
LocalResourcesTracker pubTracker = spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC, null, null);
// init containers
final Container c1 = getMockContainer(appId1, 1, user1);
final Container c2 = getMockContainer(appId2, 2, user2);
// init resources
Random r = new Random();
long seed = r.nextLong();
System.out.println("SEED: " + seed);
r.setSeed(seed);
// Send localization requests of each type.
final LocalResource privResource1 = getPrivateMockedResource(r);
final LocalResourceRequest privReq1 = new LocalResourceRequest(privResource1);
final LocalResource privResource2 = getPrivateMockedResource(r);
final LocalResourceRequest privReq2 = new LocalResourceRequest(privResource2);
final LocalResource pubResource1 = getPublicMockedResource(r);
final LocalResourceRequest pubReq1 = new LocalResourceRequest(pubResource1);
final LocalResource pubResource2 = getPublicMockedResource(r);
final LocalResourceRequest pubReq2 = new LocalResourceRequest(pubResource2);
final LocalResource appResource1 = getAppMockedResource(r);
final LocalResourceRequest appReq1 = new LocalResourceRequest(appResource1);
final LocalResource appResource2 = getAppMockedResource(r);
final LocalResourceRequest appReq2 = new LocalResourceRequest(appResource2);
final LocalResource appResource3 = getAppMockedResource(r);
final LocalResourceRequest appReq3 = new LocalResourceRequest(appResource3);
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req1 = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req1.put(LocalResourceVisibility.PRIVATE, Arrays.asList(new LocalResourceRequest[] { privReq1, privReq2 }));
req1.put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq1));
req1.put(LocalResourceVisibility.APPLICATION, Collections.singletonList(appReq1));
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req2 = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req2.put(LocalResourceVisibility.APPLICATION, Arrays.asList(new LocalResourceRequest[] { appReq2, appReq3 }));
req2.put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq2));
// Send Request event
spyService.handle(new ContainerLocalizationRequestEvent(c1, req1));
spyService.handle(new ContainerLocalizationRequestEvent(c2, req2));
dispatcher.await();
// Simulate start of localization for all resources
privTracker1.getPathForLocalization(privReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE + user1), null);
privTracker1.getPathForLocalization(privReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE + user1), null);
LocalizedResource privLr1 = privTracker1.getLocalizedResource(privReq1);
LocalizedResource privLr2 = privTracker1.getLocalizedResource(privReq2);
appTracker1.getPathForLocalization(appReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId1), null);
LocalizedResource appLr1 = appTracker1.getLocalizedResource(appReq1);
appTracker2.getPathForLocalization(appReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId2), null);
LocalizedResource appLr2 = appTracker2.getLocalizedResource(appReq2);
appTracker2.getPathForLocalization(appReq3, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId2), null);
LocalizedResource appLr3 = appTracker2.getLocalizedResource(appReq3);
pubTracker.getPathForLocalization(pubReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE), null);
LocalizedResource pubLr1 = pubTracker.getLocalizedResource(pubReq1);
pubTracker.getPathForLocalization(pubReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE), null);
LocalizedResource pubLr2 = pubTracker.getLocalizedResource(pubReq2);
// Simulate completion of localization for most resources with
// possibly different sizes than in the request
assertNotNull("Localization not started", privLr1.getLocalPath());
privTracker1.handle(new ResourceLocalizedEvent(privReq1, privLr1.getLocalPath(), privLr1.getSize() + 5));
assertNotNull("Localization not started", privLr2.getLocalPath());
privTracker1.handle(new ResourceLocalizedEvent(privReq2, privLr2.getLocalPath(), privLr2.getSize() + 10));
assertNotNull("Localization not started", appLr1.getLocalPath());
appTracker1.handle(new ResourceLocalizedEvent(appReq1, appLr1.getLocalPath(), appLr1.getSize()));
assertNotNull("Localization not started", appLr3.getLocalPath());
appTracker2.handle(new ResourceLocalizedEvent(appReq3, appLr3.getLocalPath(), appLr3.getSize() + 7));
assertNotNull("Localization not started", pubLr1.getLocalPath());
pubTracker.handle(new ResourceLocalizedEvent(pubReq1, pubLr1.getLocalPath(), pubLr1.getSize() + 1000));
assertNotNull("Localization not started", pubLr2.getLocalPath());
pubTracker.handle(new ResourceLocalizedEvent(pubReq2, pubLr2.getLocalPath(), pubLr2.getSize() + 99999));
dispatcher.await();
assertEquals(ResourceState.LOCALIZED, privLr1.getState());
assertEquals(ResourceState.LOCALIZED, privLr2.getState());
assertEquals(ResourceState.LOCALIZED, appLr1.getState());
assertEquals(ResourceState.DOWNLOADING, appLr2.getState());
assertEquals(ResourceState.LOCALIZED, appLr3.getState());
assertEquals(ResourceState.LOCALIZED, pubLr1.getState());
assertEquals(ResourceState.LOCALIZED, pubLr2.getState());
// restart and recover
spyService = createSpyService(dispatcher, dirsHandler, stateStore);
spyService.init(conf);
spyService.recoverLocalizedResources(stateStore.loadLocalizationState());
dispatcher.await();
appTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user1, appId1);
privTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE, user1, null);
appTracker2 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user2, appId2);
pubTracker = spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC, null, null);
LocalizedResource recoveredRsrc = privTracker1.getLocalizedResource(privReq1);
assertEquals(privReq1, recoveredRsrc.getRequest());
assertEquals(privLr1.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(privLr1.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = privTracker1.getLocalizedResource(privReq2);
assertEquals(privReq2, recoveredRsrc.getRequest());
assertEquals(privLr2.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(privLr2.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = appTracker1.getLocalizedResource(appReq1);
assertEquals(appReq1, recoveredRsrc.getRequest());
assertEquals(appLr1.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(appLr1.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = appTracker2.getLocalizedResource(appReq2);
assertNull("in-progress resource should not be present", recoveredRsrc);
recoveredRsrc = appTracker2.getLocalizedResource(appReq3);
assertEquals(appReq3, recoveredRsrc.getRequest());
assertEquals(appLr3.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(appLr3.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
} finally {
dispatcher.stop();
stateStore.close();
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService in project hadoop by apache.
the class TestContainerManagerRecovery method testContainerCleanupOnShutdown.
@Test
public void testContainerCleanupOnShutdown() throws Exception {
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = Collections.emptyMap();
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern");
// verify containers are stopped on shutdown without recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false);
Context context = createContext(conf, new NMNullStateStoreService());
ContainerManagerImpl cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
verify(cm).handle(isA(CMgrCompletedAppsEvent.class));
// verify containers are stopped on shutdown with unsupervised recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false);
NMMemoryStateStoreService memStore = new NMMemoryStateStoreService();
memStore.init(conf);
memStore.start();
context = createContext(conf, memStore);
cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
memStore.close();
verify(cm).handle(isA(CMgrCompletedAppsEvent.class));
// verify containers are not stopped on shutdown with supervised recovery
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
memStore = new NMMemoryStateStoreService();
memStore.init(conf);
memStore.start();
context = createContext(conf, memStore);
cm = spy(createContainerManager(context));
cm.init(conf);
cm.start();
startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertEquals(1, startResponse.getSuccessfullyStartedContainers().size());
cm.stop();
memStore.close();
verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class));
}
Aggregations