use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.
the class TestAMRMClient method allocateAndStartContainers.
private List<Container> allocateAndStartContainers(final AMRMClient<ContainerRequest> amClient, final NMClient nmClient, int num) throws YarnException, IOException {
// set up allocation requests
for (int i = 0; i < num; ++i) {
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
}
// send allocation requests
amClient.allocate(0.1f);
// let NM heartbeat to RM and trigger allocations
triggerSchedulingWithNMHeartBeat();
// get allocations
AllocateResponse allocResponse = amClient.allocate(0.1f);
List<Container> containers = allocResponse.getAllocatedContainers();
Assert.assertEquals(num, containers.size());
// build container launch context
Credentials ts = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
ts.writeTokenStorageToStream(dob);
ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
// start a process long enough for increase/decrease action to take effect
ContainerLaunchContext clc = BuilderUtils.newContainerLaunchContext(Collections.<String, LocalResource>emptyMap(), new HashMap<String, String>(), Arrays.asList("sleep", "100"), new HashMap<String, ByteBuffer>(), securityTokens, new HashMap<ApplicationAccessType, String>());
// start the containers and make sure they are in RUNNING state
try {
for (int i = 0; i < num; i++) {
Container container = containers.get(i);
nmClient.startContainer(container, clc);
// container status
while (true) {
ContainerStatus status = nmClient.getContainerStatus(container.getId(), container.getNodeId());
if (status.getState() == ContainerState.RUNNING) {
break;
}
sleep(10);
}
}
} catch (YarnException e) {
throw new AssertionError("Exception is not expected: " + e);
}
// let NM's heartbeat to RM to confirm container launch
triggerSchedulingWithNMHeartBeat();
return containers;
}
use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.
the class TestAMRMClient method testAMRMClientOnAMRMTokenRollOver.
@Test(timeout = 60000)
public void testAMRMClientOnAMRMTokenRollOver() throws YarnException, IOException {
AMRMClient<ContainerRequest> amClient = null;
try {
AMRMTokenSecretManager amrmTokenSecretManager = yarnCluster.getResourceManager().getRMContext().getAMRMTokenSecretManager();
// start am rm client
amClient = AMRMClient.<ContainerRequest>createAMRMClient();
amClient.init(conf);
amClient.start();
Long startTime = System.currentTimeMillis();
amClient.registerApplicationMaster("Host", 10000, "");
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_1 = getAMRMToken();
Assert.assertNotNull(amrmToken_1);
Assert.assertEquals(amrmToken_1.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
// At mean time, the old AMRMToken should continue to work
while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
amClient.allocate(0.1f);
sleep(1000);
}
amClient.allocate(0.1f);
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_2 = getAMRMToken();
Assert.assertNotNull(amrmToken_2);
Assert.assertEquals(amrmToken_2.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
Assert.assertNotEquals(amrmToken_1, amrmToken_2);
// can do the allocate call with latest AMRMToken
AllocateResponse response = amClient.allocate(0.1f);
// Verify latest AMRMToken can be used to send allocation request.
UserGroupInformation testUser1 = UserGroupInformation.createRemoteUser("testUser1");
AMRMTokenIdentifierForTest newVersionTokenIdentifier = new AMRMTokenIdentifierForTest(amrmToken_2.decodeIdentifier(), "message");
Assert.assertEquals("Message is changed after set to newVersionTokenIdentifier", "message", newVersionTokenIdentifier.getMessage());
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newVersionToken = new org.apache.hadoop.security.token.Token<AMRMTokenIdentifier>(newVersionTokenIdentifier.getBytes(), amrmTokenSecretManager.retrievePassword(newVersionTokenIdentifier), newVersionTokenIdentifier.getKind(), new Text());
SecurityUtil.setTokenService(newVersionToken, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
testUser1.addToken(newVersionToken);
AllocateRequest request = Records.newRecord(AllocateRequest.class);
request.setResponseId(response.getResponseId());
testUser1.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(request);
// and can not use this rolled-over token to make a allocate all.
while (true) {
if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getCurrnetMasterKeyData().getMasterKey().getKeyId()) {
if (amrmTokenSecretManager.getNextMasterKeyData() == null) {
break;
} else if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getNextMasterKeyData().getMasterKey().getKeyId()) {
break;
}
}
amClient.allocate(0.1f);
sleep(1000);
}
try {
UserGroupInformation testUser2 = UserGroupInformation.createRemoteUser("testUser2");
SecurityUtil.setTokenService(amrmToken_2, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
testUser2.addToken(amrmToken_2);
testUser2.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(Records.newRecord(AllocateRequest.class));
Assert.fail("The old Token should not work");
} catch (Exception ex) {
Assert.assertTrue(ex instanceof InvalidToken);
Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + amrmToken_2.decodeIdentifier().getApplicationAttemptId()));
}
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
} finally {
if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
amClient.stop();
}
}
}
use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.
the class TestAMRMClient method testAllocationWithBlacklist.
@Test(timeout = 60000)
public void testAllocationWithBlacklist() throws YarnException, IOException {
AMRMClientImpl<ContainerRequest> amClient = null;
try {
// start am rm client
amClient = (AMRMClientImpl<ContainerRequest>) AMRMClient.<ContainerRequest>createAMRMClient();
amClient.init(conf);
amClient.start();
amClient.registerApplicationMaster("Host", 10000, "");
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
ContainerRequest storedContainer1 = new ContainerRequest(capability, nodes, racks, priority);
amClient.addContainerRequest(storedContainer1);
assertEquals(3, amClient.ask.size());
assertEquals(0, amClient.release.size());
List<String> localNodeBlacklist = new ArrayList<String>();
localNodeBlacklist.add(node);
// put node in black list, so no container assignment
amClient.updateBlacklist(localNodeBlacklist, null);
int allocatedContainerCount = getAllocatedContainersNumber(amClient, DEFAULT_ITERATION);
// the only node is in blacklist, so no allocation
assertEquals(0, allocatedContainerCount);
// Remove node from blacklist, so get assigned with 2
amClient.updateBlacklist(null, localNodeBlacklist);
ContainerRequest storedContainer2 = new ContainerRequest(capability, nodes, racks, priority);
amClient.addContainerRequest(storedContainer2);
allocatedContainerCount = getAllocatedContainersNumber(amClient, DEFAULT_ITERATION);
assertEquals(2, allocatedContainerCount);
// Test in case exception in allocate(), blacklist is kept
assertTrue(amClient.blacklistAdditions.isEmpty());
assertTrue(amClient.blacklistRemovals.isEmpty());
// create a invalid ContainerRequest - memory value is minus
ContainerRequest invalidContainerRequest = new ContainerRequest(Resource.newInstance(-1024, 1), nodes, racks, priority);
amClient.addContainerRequest(invalidContainerRequest);
amClient.updateBlacklist(localNodeBlacklist, null);
try {
// allocate() should complain as ContainerRequest is invalid.
amClient.allocate(0.1f);
fail("there should be an exception here.");
} catch (Exception e) {
assertEquals(1, amClient.blacklistAdditions.size());
}
} finally {
if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
amClient.stop();
}
}
}
use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.
the class TestAMRMClient method testAllocation.
private void testAllocation(final AMRMClientImpl<ContainerRequest> amClient) throws YarnException, IOException {
// setup container request
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
assertNumContainers(amClient, 0, 2, 2, 2, 3, 0);
int containersRequestedAny = 2;
// RM should allocate container within 2 calls to allocate()
int allocatedContainerCount = 0;
int iterationsLeft = 3;
Set<ContainerId> releases = new TreeSet<ContainerId>();
amClient.getNMTokenCache().clearCache();
Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
HashMap<String, Token> receivedNMTokens = new HashMap<String, Token>();
while (allocatedContainerCount < containersRequestedAny && iterationsLeft-- > 0) {
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
assertEquals(nodeCount, amClient.getClusterNodeCount());
allocatedContainerCount += allocResponse.getAllocatedContainers().size();
for (Container container : allocResponse.getAllocatedContainers()) {
ContainerId rejectContainerId = container.getId();
releases.add(rejectContainerId);
amClient.releaseAssignedContainer(rejectContainerId);
}
for (NMToken token : allocResponse.getNMTokens()) {
String nodeID = token.getNodeId().toString();
if (receivedNMTokens.containsKey(nodeID)) {
Assert.fail("Received token again for : " + nodeID);
}
receivedNMTokens.put(nodeID, token.getToken());
}
if (allocatedContainerCount < containersRequestedAny) {
// let NM heartbeat to RM and trigger allocations
triggerSchedulingWithNMHeartBeat();
}
}
// Should receive atleast 1 token
Assert.assertTrue(receivedNMTokens.size() > 0 && receivedNMTokens.size() <= nodeCount);
assertEquals(allocatedContainerCount, containersRequestedAny);
assertEquals(2, amClient.release.size());
assertEquals(0, amClient.ask.size());
// need to tell the AMRMClient that we dont need these resources anymore
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
assertEquals(3, amClient.ask.size());
// send 0 container count request for resources that are no longer needed
ResourceRequest snoopRequest = amClient.ask.iterator().next();
assertEquals(0, snoopRequest.getNumContainers());
// test RPC exception handling
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
snoopRequest = amClient.ask.iterator().next();
assertEquals(2, snoopRequest.getNumContainers());
ApplicationMasterProtocol realRM = amClient.rmClient;
try {
ApplicationMasterProtocol mockRM = mock(ApplicationMasterProtocol.class);
when(mockRM.allocate(any(AllocateRequest.class))).thenAnswer(new Answer<AllocateResponse>() {
public AllocateResponse answer(InvocationOnMock invocation) throws Exception {
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
throw new Exception();
}
});
amClient.rmClient = mockRM;
amClient.allocate(0.1f);
} catch (Exception ioe) {
} finally {
amClient.rmClient = realRM;
}
assertEquals(2, amClient.release.size());
assertEquals(3, amClient.ask.size());
snoopRequest = amClient.ask.iterator().next();
// verify that the remove request made in between makeRequest and allocate
// has not been lost
assertEquals(0, snoopRequest.getNumContainers());
waitForContainerCompletion(3, amClient, releases);
}
use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.
the class TestAMRMClientAsync method runCallBackThrowOutException.
void runCallBackThrowOutException(TestCallbackHandler2 callbackHandler) throws InterruptedException, YarnException, IOException {
Configuration conf = new Configuration();
@SuppressWarnings("unchecked") AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
List<ContainerStatus> completed = Arrays.asList(ContainerStatus.newInstance(newContainerId(0, 0, 0, 0), ContainerState.COMPLETE, "", 0));
final AllocateResponse response = createAllocateResponse(completed, new ArrayList<Container>(), null);
when(client.allocate(anyFloat())).thenReturn(response);
AMRMClientAsync<ContainerRequest> asyncClient = AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
callbackHandler.asynClient = asyncClient;
callbackHandler.throwOutException = true;
asyncClient.init(conf);
asyncClient.start();
// call register and wait for error callback and stop
synchronized (callbackHandler.notifier) {
asyncClient.registerApplicationMaster("localhost", 1234, null);
while (callbackHandler.notify == false) {
try {
callbackHandler.notifier.wait();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
// verify error invoked
verify(callbackHandler, times(0)).getProgress();
verify(callbackHandler, times(1)).onError(any(Exception.class));
// sleep to wait for a few heartbeat calls that can trigger callbacks
Thread.sleep(50);
// verify no more invocations after the first one.
// ie. callback thread has stopped
verify(callbackHandler, times(0)).getProgress();
verify(callbackHandler, times(1)).onError(any(Exception.class));
}
Aggregations