Search in sources :

Example 51 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClient method allocateAndStartContainers.

private List<Container> allocateAndStartContainers(final AMRMClient<ContainerRequest> amClient, final NMClient nmClient, int num) throws YarnException, IOException {
    // set up allocation requests
    for (int i = 0; i < num; ++i) {
        amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    }
    // send allocation requests
    amClient.allocate(0.1f);
    // let NM heartbeat to RM and trigger allocations
    triggerSchedulingWithNMHeartBeat();
    // get allocations
    AllocateResponse allocResponse = amClient.allocate(0.1f);
    List<Container> containers = allocResponse.getAllocatedContainers();
    Assert.assertEquals(num, containers.size());
    // build container launch context
    Credentials ts = new Credentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    ts.writeTokenStorageToStream(dob);
    ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    // start a process long enough for increase/decrease action to take effect
    ContainerLaunchContext clc = BuilderUtils.newContainerLaunchContext(Collections.<String, LocalResource>emptyMap(), new HashMap<String, String>(), Arrays.asList("sleep", "100"), new HashMap<String, ByteBuffer>(), securityTokens, new HashMap<ApplicationAccessType, String>());
    // start the containers and make sure they are in RUNNING state
    try {
        for (int i = 0; i < num; i++) {
            Container container = containers.get(i);
            nmClient.startContainer(container, clc);
            // container status
            while (true) {
                ContainerStatus status = nmClient.getContainerStatus(container.getId(), container.getNodeId());
                if (status.getState() == ContainerState.RUNNING) {
                    break;
                }
                sleep(10);
            }
        }
    } catch (YarnException e) {
        throw new AssertionError("Exception is not expected: " + e);
    }
    // let NM's heartbeat to RM to confirm container launch
    triggerSchedulingWithNMHeartBeat();
    return containers;
}
Also used : ByteBuffer(java.nio.ByteBuffer) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) Credentials(org.apache.hadoop.security.Credentials)

Example 52 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClient method testAMRMClientOnAMRMTokenRollOver.

@Test(timeout = 60000)
public void testAMRMClientOnAMRMTokenRollOver() throws YarnException, IOException {
    AMRMClient<ContainerRequest> amClient = null;
    try {
        AMRMTokenSecretManager amrmTokenSecretManager = yarnCluster.getResourceManager().getRMContext().getAMRMTokenSecretManager();
        // start am rm client
        amClient = AMRMClient.<ContainerRequest>createAMRMClient();
        amClient.init(conf);
        amClient.start();
        Long startTime = System.currentTimeMillis();
        amClient.registerApplicationMaster("Host", 10000, "");
        org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_1 = getAMRMToken();
        Assert.assertNotNull(amrmToken_1);
        Assert.assertEquals(amrmToken_1.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
        // At mean time, the old AMRMToken should continue to work
        while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
            amClient.allocate(0.1f);
            sleep(1000);
        }
        amClient.allocate(0.1f);
        org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_2 = getAMRMToken();
        Assert.assertNotNull(amrmToken_2);
        Assert.assertEquals(amrmToken_2.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
        Assert.assertNotEquals(amrmToken_1, amrmToken_2);
        // can do the allocate call with latest AMRMToken
        AllocateResponse response = amClient.allocate(0.1f);
        // Verify latest AMRMToken can be used to send allocation request.
        UserGroupInformation testUser1 = UserGroupInformation.createRemoteUser("testUser1");
        AMRMTokenIdentifierForTest newVersionTokenIdentifier = new AMRMTokenIdentifierForTest(amrmToken_2.decodeIdentifier(), "message");
        Assert.assertEquals("Message is changed after set to newVersionTokenIdentifier", "message", newVersionTokenIdentifier.getMessage());
        org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newVersionToken = new org.apache.hadoop.security.token.Token<AMRMTokenIdentifier>(newVersionTokenIdentifier.getBytes(), amrmTokenSecretManager.retrievePassword(newVersionTokenIdentifier), newVersionTokenIdentifier.getKind(), new Text());
        SecurityUtil.setTokenService(newVersionToken, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
        testUser1.addToken(newVersionToken);
        AllocateRequest request = Records.newRecord(AllocateRequest.class);
        request.setResponseId(response.getResponseId());
        testUser1.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

            @Override
            public ApplicationMasterProtocol run() {
                return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
            }
        }).allocate(request);
        // and can not use this rolled-over token to make a allocate all.
        while (true) {
            if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getCurrnetMasterKeyData().getMasterKey().getKeyId()) {
                if (amrmTokenSecretManager.getNextMasterKeyData() == null) {
                    break;
                } else if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getNextMasterKeyData().getMasterKey().getKeyId()) {
                    break;
                }
            }
            amClient.allocate(0.1f);
            sleep(1000);
        }
        try {
            UserGroupInformation testUser2 = UserGroupInformation.createRemoteUser("testUser2");
            SecurityUtil.setTokenService(amrmToken_2, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
            testUser2.addToken(amrmToken_2);
            testUser2.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

                @Override
                public ApplicationMasterProtocol run() {
                    return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
                }
            }).allocate(Records.newRecord(AllocateRequest.class));
            Assert.fail("The old Token should not work");
        } catch (Exception ex) {
            Assert.assertTrue(ex instanceof InvalidToken);
            Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + amrmToken_2.decodeIdentifier().getApplicationAttemptId()));
        }
        amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    } finally {
        if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
            amClient.stop();
        }
    }
}
Also used : AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) Text(org.apache.hadoop.io.Text) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) AMRMTokenSecretManager(org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) InvalidContainerRequestException(org.apache.hadoop.yarn.client.api.InvalidContainerRequestException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) PrivilegedAction(java.security.PrivilegedAction) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 53 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClient method testAllocationWithBlacklist.

@Test(timeout = 60000)
public void testAllocationWithBlacklist() throws YarnException, IOException {
    AMRMClientImpl<ContainerRequest> amClient = null;
    try {
        // start am rm client
        amClient = (AMRMClientImpl<ContainerRequest>) AMRMClient.<ContainerRequest>createAMRMClient();
        amClient.init(conf);
        amClient.start();
        amClient.registerApplicationMaster("Host", 10000, "");
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        ContainerRequest storedContainer1 = new ContainerRequest(capability, nodes, racks, priority);
        amClient.addContainerRequest(storedContainer1);
        assertEquals(3, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        List<String> localNodeBlacklist = new ArrayList<String>();
        localNodeBlacklist.add(node);
        // put node in black list, so no container assignment
        amClient.updateBlacklist(localNodeBlacklist, null);
        int allocatedContainerCount = getAllocatedContainersNumber(amClient, DEFAULT_ITERATION);
        // the only node is in blacklist, so no allocation
        assertEquals(0, allocatedContainerCount);
        // Remove node from blacklist, so get assigned with 2
        amClient.updateBlacklist(null, localNodeBlacklist);
        ContainerRequest storedContainer2 = new ContainerRequest(capability, nodes, racks, priority);
        amClient.addContainerRequest(storedContainer2);
        allocatedContainerCount = getAllocatedContainersNumber(amClient, DEFAULT_ITERATION);
        assertEquals(2, allocatedContainerCount);
        // Test in case exception in allocate(), blacklist is kept
        assertTrue(amClient.blacklistAdditions.isEmpty());
        assertTrue(amClient.blacklistRemovals.isEmpty());
        // create a invalid ContainerRequest - memory value is minus
        ContainerRequest invalidContainerRequest = new ContainerRequest(Resource.newInstance(-1024, 1), nodes, racks, priority);
        amClient.addContainerRequest(invalidContainerRequest);
        amClient.updateBlacklist(localNodeBlacklist, null);
        try {
            // allocate() should complain as ContainerRequest is invalid.
            amClient.allocate(0.1f);
            fail("there should be an exception here.");
        } catch (Exception e) {
            assertEquals(1, amClient.blacklistAdditions.size());
        }
    } finally {
        if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
            amClient.stop();
        }
    }
}
Also used : ArrayList(java.util.ArrayList) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) InvalidContainerRequestException(org.apache.hadoop.yarn.client.api.InvalidContainerRequestException) Test(org.junit.Test)

Example 54 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClient method testAllocation.

private void testAllocation(final AMRMClientImpl<ContainerRequest> amClient) throws YarnException, IOException {
    // setup container request
    assertEquals(0, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    assertNumContainers(amClient, 0, 2, 2, 2, 3, 0);
    int containersRequestedAny = 2;
    // RM should allocate container within 2 calls to allocate()
    int allocatedContainerCount = 0;
    int iterationsLeft = 3;
    Set<ContainerId> releases = new TreeSet<ContainerId>();
    amClient.getNMTokenCache().clearCache();
    Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
    HashMap<String, Token> receivedNMTokens = new HashMap<String, Token>();
    while (allocatedContainerCount < containersRequestedAny && iterationsLeft-- > 0) {
        AllocateResponse allocResponse = amClient.allocate(0.1f);
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        assertEquals(nodeCount, amClient.getClusterNodeCount());
        allocatedContainerCount += allocResponse.getAllocatedContainers().size();
        for (Container container : allocResponse.getAllocatedContainers()) {
            ContainerId rejectContainerId = container.getId();
            releases.add(rejectContainerId);
            amClient.releaseAssignedContainer(rejectContainerId);
        }
        for (NMToken token : allocResponse.getNMTokens()) {
            String nodeID = token.getNodeId().toString();
            if (receivedNMTokens.containsKey(nodeID)) {
                Assert.fail("Received token again for : " + nodeID);
            }
            receivedNMTokens.put(nodeID, token.getToken());
        }
        if (allocatedContainerCount < containersRequestedAny) {
            // let NM heartbeat to RM and trigger allocations
            triggerSchedulingWithNMHeartBeat();
        }
    }
    // Should receive atleast 1 token
    Assert.assertTrue(receivedNMTokens.size() > 0 && receivedNMTokens.size() <= nodeCount);
    assertEquals(allocatedContainerCount, containersRequestedAny);
    assertEquals(2, amClient.release.size());
    assertEquals(0, amClient.ask.size());
    // need to tell the AMRMClient that we dont need these resources anymore
    amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    assertEquals(3, amClient.ask.size());
    // send 0 container count request for resources that are no longer needed
    ResourceRequest snoopRequest = amClient.ask.iterator().next();
    assertEquals(0, snoopRequest.getNumContainers());
    // test RPC exception handling
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    snoopRequest = amClient.ask.iterator().next();
    assertEquals(2, snoopRequest.getNumContainers());
    ApplicationMasterProtocol realRM = amClient.rmClient;
    try {
        ApplicationMasterProtocol mockRM = mock(ApplicationMasterProtocol.class);
        when(mockRM.allocate(any(AllocateRequest.class))).thenAnswer(new Answer<AllocateResponse>() {

            public AllocateResponse answer(InvocationOnMock invocation) throws Exception {
                amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
                amClient.removeContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
                throw new Exception();
            }
        });
        amClient.rmClient = mockRM;
        amClient.allocate(0.1f);
    } catch (Exception ioe) {
    } finally {
        amClient.rmClient = realRM;
    }
    assertEquals(2, amClient.release.size());
    assertEquals(3, amClient.ask.size());
    snoopRequest = amClient.ask.iterator().next();
    // verify that the remove request made in between makeRequest and allocate 
    // has not been lost
    assertEquals(0, snoopRequest.getNumContainers());
    waitForContainerCompletion(3, amClient, releases);
}
Also used : HashMap(java.util.HashMap) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) InvalidContainerRequestException(org.apache.hadoop.yarn.client.api.InvalidContainerRequestException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) TreeSet(java.util.TreeSet) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)

Example 55 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClientAsync method runCallBackThrowOutException.

void runCallBackThrowOutException(TestCallbackHandler2 callbackHandler) throws InterruptedException, YarnException, IOException {
    Configuration conf = new Configuration();
    @SuppressWarnings("unchecked") AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
    List<ContainerStatus> completed = Arrays.asList(ContainerStatus.newInstance(newContainerId(0, 0, 0, 0), ContainerState.COMPLETE, "", 0));
    final AllocateResponse response = createAllocateResponse(completed, new ArrayList<Container>(), null);
    when(client.allocate(anyFloat())).thenReturn(response);
    AMRMClientAsync<ContainerRequest> asyncClient = AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
    callbackHandler.asynClient = asyncClient;
    callbackHandler.throwOutException = true;
    asyncClient.init(conf);
    asyncClient.start();
    // call register and wait for error callback and stop
    synchronized (callbackHandler.notifier) {
        asyncClient.registerApplicationMaster("localhost", 1234, null);
        while (callbackHandler.notify == false) {
            try {
                callbackHandler.notifier.wait();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
    // verify error invoked
    verify(callbackHandler, times(0)).getProgress();
    verify(callbackHandler, times(1)).onError(any(Exception.class));
    // sleep to wait for a few heartbeat calls that can trigger callbacks
    Thread.sleep(50);
    // verify no more invocations after the first one.
    // ie. callback thread has stopped
    verify(callbackHandler, times(0)).getProgress();
    verify(callbackHandler, times(1)).onError(any(Exception.class));
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) Container(org.apache.hadoop.yarn.api.records.Container) Configuration(org.apache.hadoop.conf.Configuration) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException)

Aggregations

YarnException (org.apache.hadoop.yarn.exceptions.YarnException)287 IOException (java.io.IOException)149 Test (org.junit.Test)107 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)61 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)31 Configuration (org.apache.hadoop.conf.Configuration)26 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)26 ArrayList (java.util.ArrayList)25 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)25 ApplicationNotFoundException (org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException)25 AccessControlException (org.apache.hadoop.security.AccessControlException)22 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)21 UndeclaredThrowableException (java.lang.reflect.UndeclaredThrowableException)17 Path (org.apache.hadoop.fs.Path)17 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)15 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)15 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)14 HashMap (java.util.HashMap)13 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)13