use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState in project hadoop by apache.
the class TestNMLeveldbStateStoreService method testContainerStorage.
@Test
public void testContainerStorage() throws IOException {
// test empty when no state
List<RecoveredContainerState> recoveredContainers = stateStore.loadContainersState();
assertTrue(recoveredContainers.isEmpty());
// create a container request
ApplicationId appId = ApplicationId.newInstance(1234, 3);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 4);
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5);
StartContainerRequest containerReq = createContainerRequest(containerId);
// store a container and verify recovered
stateStore.storeContainer(containerId, 0, containerReq);
// verify the container version key is not stored for new containers
DB db = stateStore.getDB();
assertNull("version key present for new container", db.get(bytes(stateStore.getContainerVersionKey(containerId.toString()))));
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
RecoveredContainerState rcs = recoveredContainers.get(0);
assertEquals(0, rcs.getVersion());
assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertEquals(false, rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertTrue(rcs.getDiagnostics().isEmpty());
// store a new container record without StartContainerRequest
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 6);
stateStore.storeContainerLaunched(containerId1);
recoveredContainers = stateStore.loadContainersState();
// check whether the new container record is discarded
assertEquals(1, recoveredContainers.size());
// queue the container, and verify recovered
stateStore.storeContainerQueued(containerId);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.QUEUED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertEquals(false, rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertTrue(rcs.getDiagnostics().isEmpty());
// launch the container, add some diagnostics, and verify recovered
StringBuilder diags = new StringBuilder();
stateStore.storeContainerLaunched(containerId);
diags.append("some diags for container");
stateStore.storeContainerDiagnostics(containerId, diags);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertEquals(false, rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertEquals(diags.toString(), rcs.getDiagnostics());
// increase the container size, and verify recovered
stateStore.storeContainerResourceChanged(containerId, 2, Resource.newInstance(2468, 4));
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(2, rcs.getVersion());
assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertEquals(false, rcs.getKilled());
assertEquals(Resource.newInstance(2468, 4), rcs.getCapability());
// mark the container killed, add some more diags, and verify recovered
diags.append("some more diags for container");
stateStore.storeContainerDiagnostics(containerId, diags);
stateStore.storeContainerKilled(containerId);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertTrue(rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertEquals(diags.toString(), rcs.getDiagnostics());
// add yet more diags, mark container completed, and verify recovered
diags.append("some final diags");
stateStore.storeContainerDiagnostics(containerId, diags);
stateStore.storeContainerCompleted(containerId, 21);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.COMPLETED, rcs.getStatus());
assertEquals(21, rcs.getExitCode());
assertTrue(rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertEquals(diags.toString(), rcs.getDiagnostics());
// store remainingRetryAttempts, workDir and logDir
stateStore.storeContainerRemainingRetryAttempts(containerId, 6);
stateStore.storeContainerWorkDir(containerId, "/test/workdir");
stateStore.storeContainerLogDir(containerId, "/test/logdir");
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0);
assertEquals(6, rcs.getRemainingRetryAttempts());
assertEquals("/test/workdir", rcs.getWorkDir());
assertEquals("/test/logdir", rcs.getLogDir());
// remove the container and verify not recovered
stateStore.removeContainer(containerId);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertTrue(recoveredContainers.isEmpty());
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState in project hadoop by apache.
the class ContainerManagerImpl method recover.
@SuppressWarnings("unchecked")
private void recover() throws IOException, URISyntaxException {
NMStateStoreService stateStore = context.getNMStateStore();
if (stateStore.canRecover()) {
rsrcLocalizationSrvc.recoverLocalizedResources(stateStore.loadLocalizationState());
RecoveredApplicationsState appsState = stateStore.loadApplicationsState();
for (ContainerManagerApplicationProto proto : appsState.getApplications()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering application with state: " + proto.toString());
}
recoverApplication(proto);
}
for (RecoveredContainerState rcs : stateStore.loadContainersState()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering container with state: " + rcs);
}
recoverContainer(rcs);
}
} else {
LOG.info("Not a recoverable state store. Nothing to recover.");
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState in project hadoop by apache.
the class TestNMLeveldbStateStoreService method testUnexpectedKeyDoesntThrowException.
@Test
public void testUnexpectedKeyDoesntThrowException() throws IOException {
// test empty when no state
List<RecoveredContainerState> recoveredContainers = stateStore.loadContainersState();
assertTrue(recoveredContainers.isEmpty());
// create a container request
ApplicationId appId = ApplicationId.newInstance(1234, 3);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 4);
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5);
LocalResource lrsrc = LocalResource.newInstance(URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, 1234567890L);
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put("rsrc", lrsrc);
Map<String, String> env = new HashMap<String, String>();
env.put("somevar", "someval");
List<String> containerCmds = new ArrayList<String>();
containerCmds.add("somecmd");
containerCmds.add("somearg");
Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
serviceData.put("someservice", ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 }));
ByteBuffer containerTokens = ByteBuffer.wrap(new byte[] { 0x7, 0x8, 0x9, 0xa });
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>();
acls.put(ApplicationAccessType.VIEW_APP, "viewuser");
acls.put(ApplicationAccessType.MODIFY_APP, "moduser");
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, env, containerCmds, serviceData, containerTokens, acls);
Resource containerRsrc = Resource.newInstance(1357, 3);
ContainerTokenIdentifier containerTokenId = new ContainerTokenIdentifier(containerId, "host", "user", containerRsrc, 9876543210L, 42, 2468, Priority.newInstance(7), 13579);
Token containerToken = Token.newInstance(containerTokenId.getBytes(), ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), "tokenservice");
StartContainerRequest containerReq = StartContainerRequest.newInstance(clc, containerToken);
stateStore.storeContainer(containerId, 0, containerReq);
// add a invalid key
byte[] invalidKey = ("ContainerManager/containers/" + containerId.toString() + "/invalidKey1234").getBytes();
stateStore.getDB().put(invalidKey, new byte[1]);
restartStateStore();
recoveredContainers = stateStore.loadContainersState();
assertEquals(1, recoveredContainers.size());
RecoveredContainerState rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus());
assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
assertEquals(false, rcs.getKilled());
assertEquals(containerReq, rcs.getStartRequest());
assertTrue(rcs.getDiagnostics().isEmpty());
assertEquals(RecoveredContainerType.KILL, rcs.getRecoveryType());
// assert unknown keys are cleaned up finally
assertNotNull(stateStore.getDB().get(invalidKey));
stateStore.removeContainer(containerId);
assertNull(stateStore.getDB().get(invalidKey));
}
Aggregations