Search in sources :

Example 1 with NMToken

use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.

the class RMContainerAllocator method getResources.

@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
    applyConcurrentTaskLimits();
    // will be null the first time
    Resource headRoom = Resources.clone(getAvailableResources());
    AllocateResponse response;
    /*
     * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
     * milliseconds before aborting. During this interval, AM will still try
     * to contact the RM.
     */
    try {
        response = makeRemoteRequest();
        // Reset retry count if no exception occurred.
        retrystartTime = System.currentTimeMillis();
    } catch (ApplicationAttemptNotFoundException e) {
        // This can happen if the RM has been restarted. If it is in that state,
        // this application must clean itself up.
        eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
        throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
    } catch (ApplicationMasterNotRegisteredException e) {
        LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
        // RM may have restarted, re-register with RM.
        lastResponseID = 0;
        register();
        addOutstandingRequestOnResync();
        return null;
    } catch (InvalidLabelResourceRequestException e) {
        // If Invalid label exception is received means the requested label doesnt
        // have access so killing job in this case.
        String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
        LOG.info(diagMsg);
        JobId jobId = this.getJob().getID();
        eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
        eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
        throw e;
    } catch (Exception e) {
        // re-trying until the retryInterval has expired.
        if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
            LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
            eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
            throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
        }
        // continue to attempt to contact the RM.
        throw e;
    }
    Resource newHeadRoom = getAvailableResources();
    List<Container> newContainers = response.getAllocatedContainers();
    // Setting NMTokens
    if (response.getNMTokens() != null) {
        for (NMToken nmToken : response.getNMTokens()) {
            NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
        }
    }
    // Setting AMRMToken
    if (response.getAMRMToken() != null) {
        updateAMRMToken(response.getAMRMToken());
    }
    List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
    // propagate preemption requests
    final PreemptionMessage preemptReq = response.getPreemptionMessage();
    if (preemptReq != null) {
        preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
    }
    if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
        //something changed
        recalculateReduceSchedule = true;
        if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
            LOG.debug("headroom=" + newHeadRoom);
        }
    }
    if (LOG.isDebugEnabled()) {
        for (Container cont : newContainers) {
            LOG.debug("Received new Container :" + cont);
        }
    }
    //Called on each allocation. Will know about newly blacklisted/added hosts.
    computeIgnoreBlacklisting();
    handleUpdatedNodes(response);
    handleJobPriorityChange(response);
    // handle receiving the timeline collector address for this app
    String collectorAddr = response.getCollectorAddr();
    MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
    if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
        appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
    }
    for (ContainerStatus cont : finishedContainers) {
        processFinishedContainer(cont);
    }
    return newContainers;
}
Also used : PreemptionMessage(org.apache.hadoop.yarn.api.records.PreemptionMessage) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Resource(org.apache.hadoop.yarn.api.records.Resource) JobDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) InvalidLabelResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) InvalidLabelResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 2 with NMToken

use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.

the class TestOpportunisticContainerAllocation method testMixedAllocationAndRelease.

@Test(timeout = 60000)
public void testMixedAllocationAndRelease() throws YarnException, IOException {
    // setup container request
    assertEquals(0, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
    int containersRequestedNode = amClient.getTable(0).get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    int containersRequestedRack = amClient.getTable(0).get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    int containersRequestedAny = amClient.getTable(0).get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
    assertEquals(4, containersRequestedNode);
    assertEquals(4, containersRequestedRack);
    assertEquals(4, containersRequestedAny);
    assertEquals(2, oppContainersRequestedAny);
    assertEquals(4, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
    containersRequestedNode = amClient.getTable(0).get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    containersRequestedRack = amClient.getTable(0).get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    containersRequestedAny = amClient.getTable(0).get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
    oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
    assertEquals(2, containersRequestedNode);
    assertEquals(2, containersRequestedRack);
    assertEquals(2, containersRequestedAny);
    assertEquals(1, oppContainersRequestedAny);
    assertEquals(4, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    // RM should allocate container within 2 calls to allocate()
    int allocatedContainerCount = 0;
    int allocatedOpportContainerCount = 0;
    int iterationsLeft = 50;
    Set<ContainerId> releases = new TreeSet<>();
    amClient.getNMTokenCache().clearCache();
    Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
    HashMap<String, Token> receivedNMTokens = new HashMap<>();
    while (allocatedContainerCount < containersRequestedAny + oppContainersRequestedAny && iterationsLeft-- > 0) {
        AllocateResponse allocResponse = amClient.allocate(0.1f);
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        allocatedContainerCount += allocResponse.getAllocatedContainers().size();
        for (Container container : allocResponse.getAllocatedContainers()) {
            if (container.getExecutionType() == ExecutionType.OPPORTUNISTIC) {
                allocatedOpportContainerCount++;
            }
            ContainerId rejectContainerId = container.getId();
            releases.add(rejectContainerId);
        }
        for (NMToken token : allocResponse.getNMTokens()) {
            String nodeID = token.getNodeId().toString();
            receivedNMTokens.put(nodeID, token.getToken());
        }
        if (allocatedContainerCount < containersRequestedAny) {
            // sleep to let NM's heartbeat to RM and trigger allocations
            sleep(100);
        }
    }
    assertEquals(containersRequestedAny + oppContainersRequestedAny, allocatedContainerCount);
    assertEquals(oppContainersRequestedAny, allocatedOpportContainerCount);
    for (ContainerId rejectContainerId : releases) {
        amClient.releaseAssignedContainer(rejectContainerId);
    }
    assertEquals(3, amClient.release.size());
    assertEquals(0, amClient.ask.size());
    // need to tell the AMRMClient that we don't need these resources anymore
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
    amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
    assertEquals(4, amClient.ask.size());
    iterationsLeft = 3;
    // do a few iterations to ensure RM is not going to send new containers
    while (iterationsLeft-- > 0) {
        // inform RM of rejection
        AllocateResponse allocResponse = amClient.allocate(0.1f);
        // RM did not send new containers because AM does not need any
        assertEquals(0, allocResponse.getAllocatedContainers().size());
        if (allocResponse.getCompletedContainersStatuses().size() > 0) {
            for (ContainerStatus cStatus : allocResponse.getCompletedContainersStatuses()) {
                if (releases.contains(cStatus.getContainerId())) {
                    assertEquals(cStatus.getState(), ContainerState.COMPLETE);
                    assertEquals(-100, cStatus.getExitStatus());
                    releases.remove(cStatus.getContainerId());
                }
            }
        }
        if (iterationsLeft > 0) {
            // sleep to make sure NM's heartbeat
            sleep(100);
        }
    }
    assertEquals(0, amClient.ask.size());
    assertEquals(0, amClient.release.size());
}
Also used : AMRMClient(org.apache.hadoop.yarn.client.api.AMRMClient) NMToken(org.apache.hadoop.yarn.api.records.NMToken) HashMap(java.util.HashMap) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Token(org.apache.hadoop.yarn.api.records.Token) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TreeSet(java.util.TreeSet) Test(org.junit.Test)

Example 3 with NMToken

use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.

the class TestOpportunisticContainerAllocation method testPromotionFromAcquired.

@Test(timeout = 60000)
public void testPromotionFromAcquired() throws YarnException, IOException {
    // setup container request
    assertEquals(0, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
    int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
    assertEquals(1, oppContainersRequestedAny);
    assertEquals(1, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    // RM should allocate container within 2 calls to allocate()
    int allocatedContainerCount = 0;
    Map<ContainerId, Container> allocatedOpportContainers = new HashMap<>();
    int iterationsLeft = 50;
    amClient.getNMTokenCache().clearCache();
    Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
    HashMap<String, Token> receivedNMTokens = new HashMap<>();
    updateMetrics("Before Opp Allocation");
    while (allocatedContainerCount < oppContainersRequestedAny && iterationsLeft-- > 0) {
        AllocateResponse allocResponse = amClient.allocate(0.1f);
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        allocatedContainerCount += allocResponse.getAllocatedContainers().size();
        for (Container container : allocResponse.getAllocatedContainers()) {
            if (container.getExecutionType() == ExecutionType.OPPORTUNISTIC) {
                allocatedOpportContainers.put(container.getId(), container);
                removeCR(container);
            }
        }
        for (NMToken token : allocResponse.getNMTokens()) {
            String nodeID = token.getNodeId().toString();
            receivedNMTokens.put(nodeID, token.getToken());
        }
        if (allocatedContainerCount < oppContainersRequestedAny) {
            // sleep to let NM's heartbeat to RM and trigger allocations
            sleep(100);
        }
    }
    assertEquals(oppContainersRequestedAny, allocatedContainerCount);
    assertEquals(oppContainersRequestedAny, allocatedOpportContainers.size());
    updateMetrics("After Opp Allocation / Before Promotion");
    try {
        Container c = allocatedOpportContainers.values().iterator().next();
        amClient.requestContainerUpdate(c, UpdateContainerRequest.newInstance(c.getVersion(), c.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC));
        Assert.fail("Should throw Exception..");
    } catch (IllegalArgumentException e) {
        System.out.println("## " + e.getMessage());
        Assert.assertTrue(e.getMessage().contains("target should be GUARANTEED and original should be OPPORTUNISTIC"));
    }
    Container c = allocatedOpportContainers.values().iterator().next();
    amClient.requestContainerUpdate(c, UpdateContainerRequest.newInstance(c.getVersion(), c.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED));
    iterationsLeft = 120;
    Map<ContainerId, UpdatedContainer> updatedContainers = new HashMap<>();
    // do a few iterations to ensure RM is not going to send new containers
    while (iterationsLeft-- > 0 && updatedContainers.isEmpty()) {
        // inform RM of rejection
        AllocateResponse allocResponse = amClient.allocate(0.1f);
        // RM did not send new containers because AM does not need any
        if (allocResponse.getUpdatedContainers() != null) {
            for (UpdatedContainer updatedContainer : allocResponse.getUpdatedContainers()) {
                System.out.println("Got update..");
                updatedContainers.put(updatedContainer.getContainer().getId(), updatedContainer);
            }
        }
        if (iterationsLeft > 0) {
            // sleep to make sure NM's heartbeat
            sleep(100);
        }
    }
    updateMetrics("After Promotion");
    assertEquals(1, updatedContainers.size());
    for (ContainerId cId : allocatedOpportContainers.keySet()) {
        Container orig = allocatedOpportContainers.get(cId);
        UpdatedContainer updatedContainer = updatedContainers.get(cId);
        assertNotNull(updatedContainer);
        assertEquals(ExecutionType.GUARANTEED, updatedContainer.getContainer().getExecutionType());
        assertEquals(orig.getResource(), updatedContainer.getContainer().getResource());
        assertEquals(orig.getNodeId(), updatedContainer.getContainer().getNodeId());
        assertEquals(orig.getVersion() + 1, updatedContainer.getContainer().getVersion());
    }
    assertEquals(0, amClient.ask.size());
    assertEquals(0, amClient.release.size());
    amClient.ask.clear();
}
Also used : AMRMClient(org.apache.hadoop.yarn.client.api.AMRMClient) NMToken(org.apache.hadoop.yarn.api.records.NMToken) HashMap(java.util.HashMap) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Token(org.apache.hadoop.yarn.api.records.Token) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) Container(org.apache.hadoop.yarn.api.records.Container) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Test(org.junit.Test)

Example 4 with NMToken

use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.

the class TestDistributedScheduling method testAMRMClient.

/**
   * Validates if AMRMClient can be used with Distributed Scheduling turned on.
   *
   * @throws Exception
   */
@Test(timeout = 120000)
@SuppressWarnings("unchecked")
public void testAMRMClient() throws Exception {
    AMRMClientImpl<AMRMClient.ContainerRequest> amClient = null;
    try {
        Priority priority = Priority.newInstance(1);
        Priority priority2 = Priority.newInstance(2);
        Resource capability = Resource.newInstance(1024, 1);
        List<NodeReport> nodeReports = rmClient.getNodeReports(NodeState.RUNNING);
        String node = nodeReports.get(0).getNodeId().getHost();
        String rack = nodeReports.get(0).getRackName();
        String[] nodes = new String[] { node };
        String[] racks = new String[] { rack };
        // start am rm client
        amClient = new AMRMClientImpl(client);
        amClient.init(yarnConf);
        amClient.start();
        amClient.registerApplicationMaster(NetUtils.getHostname(), 1024, "");
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
        RemoteRequestsTable<ContainerRequest> remoteRequestsTable = amClient.getTable(0);
        int containersRequestedNode = remoteRequestsTable.get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
        int containersRequestedRack = remoteRequestsTable.get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
        int containersRequestedAny = remoteRequestsTable.get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
        int oppContainersRequestedAny = remoteRequestsTable.get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
        assertEquals(2, containersRequestedNode);
        assertEquals(2, containersRequestedRack);
        assertEquals(2, containersRequestedAny);
        assertEquals(1, oppContainersRequestedAny);
        assertEquals(4, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        // RM should allocate container within 2 calls to allocate()
        int allocatedContainerCount = 0;
        int iterationsLeft = 10;
        Set<ContainerId> releases = new TreeSet<>();
        amClient.getNMTokenCache().clearCache();
        Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
        HashMap<String, Token> receivedNMTokens = new HashMap<>();
        while (allocatedContainerCount < (containersRequestedAny + oppContainersRequestedAny) && iterationsLeft-- > 0) {
            AllocateResponse allocResponse = amClient.allocate(0.1f);
            assertEquals(0, amClient.ask.size());
            assertEquals(0, amClient.release.size());
            allocatedContainerCount += allocResponse.getAllocatedContainers().size();
            for (Container container : allocResponse.getAllocatedContainers()) {
                ContainerId rejectContainerId = container.getId();
                releases.add(rejectContainerId);
            }
            for (NMToken token : allocResponse.getNMTokens()) {
                String nodeID = token.getNodeId().toString();
                receivedNMTokens.put(nodeID, token.getToken());
            }
            if (allocatedContainerCount < containersRequestedAny) {
                // sleep to let NM's heartbeat to RM and trigger allocations
                sleep(100);
            }
        }
        assertEquals(allocatedContainerCount, containersRequestedAny + oppContainersRequestedAny);
        for (ContainerId rejectContainerId : releases) {
            amClient.releaseAssignedContainer(rejectContainerId);
        }
        assertEquals(3, amClient.release.size());
        assertEquals(0, amClient.ask.size());
        // need to tell the AMRMClient that we dont need these resources anymore
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
        assertEquals(4, amClient.ask.size());
        // test RPC exception handling
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
        amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
        final AMRMClient amc = amClient;
        ApplicationMasterProtocol realRM = amClient.rmClient;
        try {
            ApplicationMasterProtocol mockRM = mock(ApplicationMasterProtocol.class);
            when(mockRM.allocate(any(AllocateRequest.class))).thenAnswer(new Answer<AllocateResponse>() {

                public AllocateResponse answer(InvocationOnMock invocation) throws Exception {
                    amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
                    amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
                    amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
                    throw new Exception();
                }
            });
            amClient.rmClient = mockRM;
            amClient.allocate(0.1f);
        } catch (Exception ioe) {
        } finally {
            amClient.rmClient = realRM;
        }
        assertEquals(3, amClient.release.size());
        assertEquals(6, amClient.ask.size());
        iterationsLeft = 3;
        // do a few iterations to ensure RM is not going send new containers
        while (iterationsLeft-- > 0) {
            // inform RM of rejection
            AllocateResponse allocResponse = amClient.allocate(0.1f);
            // RM did not send new containers because AM does not need any
            assertEquals(0, allocResponse.getAllocatedContainers().size());
            if (allocResponse.getCompletedContainersStatuses().size() > 0) {
                for (ContainerStatus cStatus : allocResponse.getCompletedContainersStatuses()) {
                    if (releases.contains(cStatus.getContainerId())) {
                        assertEquals(cStatus.getState(), ContainerState.COMPLETE);
                        assertEquals(-100, cStatus.getExitStatus());
                        releases.remove(cStatus.getContainerId());
                    }
                }
            }
            if (iterationsLeft > 0) {
                // sleep to make sure NM's heartbeat
                sleep(100);
            }
        }
        assertEquals(0, amClient.ask.size());
        assertEquals(0, amClient.release.size());
        amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    } finally {
        if (amClient != null && amClient.getServiceState() == Service.STATE.STARTED) {
            amClient.stop();
        }
    }
}
Also used : AMRMClient(org.apache.hadoop.yarn.client.api.AMRMClient) HashMap(java.util.HashMap) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Token(org.apache.hadoop.yarn.api.records.Token) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TreeSet(java.util.TreeSet) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) InvocationOnMock(org.mockito.invocation.InvocationOnMock) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 5 with NMToken

use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.

the class ApplicationMasterService method registerApplicationMaster.

@Override
public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws YarnException, IOException {
    AMRMTokenIdentifier amrmTokenIdentifier = YarnServerSecurityUtils.authorizeRequest();
    ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.getApplicationAttemptId();
    ApplicationId appID = applicationAttemptId.getApplicationId();
    AllocateResponseLock lock = responseMap.get(applicationAttemptId);
    if (lock == null) {
        RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService", "Error in registering application master", appID, applicationAttemptId);
        throwApplicationDoesNotExistInCacheException(applicationAttemptId);
    }
    // Allow only one thread in AM to do registerApp at a time.
    synchronized (lock) {
        AllocateResponse lastResponse = lock.getAllocateResponse();
        if (hasApplicationMasterRegistered(applicationAttemptId)) {
            String message = "Application Master is already registered : " + appID;
            LOG.warn(message);
            RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "", "ApplicationMasterService", message, appID, applicationAttemptId);
            throw new InvalidApplicationMasterRequestException(message);
        }
        this.amLivelinessMonitor.receivedPing(applicationAttemptId);
        RMApp app = this.rmContext.getRMApps().get(appID);
        // Setting the response id to 0 to identify if the
        // application master is register for the respective attemptid
        lastResponse.setResponseId(0);
        lock.setAllocateResponse(lastResponse);
        LOG.info("AM registration " + applicationAttemptId);
        this.rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptRegistrationEvent(applicationAttemptId, request.getHost(), request.getRpcPort(), request.getTrackingUrl()));
        RMAuditLogger.logSuccess(app.getUser(), AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, applicationAttemptId);
        // Pick up min/max resource from scheduler...
        RegisterApplicationMasterResponse response = recordFactory.newRecordInstance(RegisterApplicationMasterResponse.class);
        response.setMaximumResourceCapability(rScheduler.getMaximumResourceCapability(app.getQueue()));
        response.setApplicationACLs(app.getRMAppAttempt(applicationAttemptId).getSubmissionContext().getAMContainerSpec().getApplicationACLs());
        response.setQueue(app.getQueue());
        if (UserGroupInformation.isSecurityEnabled()) {
            LOG.info("Setting client token master key");
            response.setClientToAMTokenMasterKey(java.nio.ByteBuffer.wrap(rmContext.getClientToAMTokenSecretManager().getMasterKey(applicationAttemptId).getEncoded()));
        }
        // and corresponding NM tokens.
        if (app.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
            List<Container> transferredContainers = rScheduler.getTransferredContainers(applicationAttemptId);
            if (!transferredContainers.isEmpty()) {
                response.setContainersFromPreviousAttempts(transferredContainers);
                List<NMToken> nmTokens = new ArrayList<NMToken>();
                for (Container container : transferredContainers) {
                    try {
                        NMToken token = rmContext.getNMTokenSecretManager().createAndGetNMToken(app.getUser(), applicationAttemptId, container);
                        if (null != token) {
                            nmTokens.add(token);
                        }
                    } catch (IllegalArgumentException e) {
                        // will be automatically retried by RMProxy in RPC layer.
                        if (e.getCause() instanceof UnknownHostException) {
                            throw (UnknownHostException) e.getCause();
                        }
                    }
                }
                response.setNMTokensFromPreviousAttempts(nmTokens);
                LOG.info("Application " + appID + " retrieved " + transferredContainers.size() + " containers from previous" + " attempts and " + nmTokens.size() + " NM tokens.");
            }
        }
        response.setSchedulerResourceTypes(rScheduler.getSchedulingResourceTypes());
        return response;
    }
}
Also used : InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NMToken(org.apache.hadoop.yarn.api.records.NMToken) UnknownHostException(java.net.UnknownHostException) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) PreemptionContainer(org.apache.hadoop.yarn.api.records.PreemptionContainer) Container(org.apache.hadoop.yarn.api.records.Container) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMAppAttemptRegistrationEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent)

Aggregations

NMToken (org.apache.hadoop.yarn.api.records.NMToken)20 Container (org.apache.hadoop.yarn.api.records.Container)15 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)12 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)11 Token (org.apache.hadoop.yarn.api.records.Token)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)6 UpdatedContainer (org.apache.hadoop.yarn.api.records.UpdatedContainer)6 AMRMClient (org.apache.hadoop.yarn.client.api.AMRMClient)5 TreeSet (java.util.TreeSet)4 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)4 NodeId (org.apache.hadoop.yarn.api.records.NodeId)4 Resource (org.apache.hadoop.yarn.api.records.Resource)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)3 RegisterApplicationMasterResponse (org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse)2 NodeReport (org.apache.hadoop.yarn.api.records.NodeReport)2 Priority (org.apache.hadoop.yarn.api.records.Priority)2 ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)2