Search in sources :

Example 61 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestWorkPreservingRMRestart method testReleasedContainerNotRecovered.

// Test if RM on recovery receives the container release request from AM
// before it receives the container status reported by NM for recovery. this
// container should not be recovered.
@Test(timeout = 50000)
public void testReleasedContainerNotRecovered() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    rm1 = new MockRM(conf, memStore);
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    rm1.start();
    RMApp app1 = rm1.submitApp(1024);
    final MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // Re-start RM
    conf.setInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, 8000);
    rm2 = new MockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    am1.registerAppAttempt(true);
    // try to release a container before the container is actually recovered.
    final ContainerId runningContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    am1.allocate(null, Arrays.asList(runningContainer));
    // send container statuses to recover the containers
    List<NMContainerStatus> containerStatuses = createNMContainerStatusForApp(am1);
    nm1.registerNode(containerStatuses, null);
    // only the am container should be recovered.
    waitForNumContainersToRecover(1, rm2, am1.getApplicationAttemptId());
    final AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
    // cached release request is cleaned.
    // assertFalse(scheduler.getPendingRelease().contains(runningContainer));
    AllocateResponse response = am1.allocate(null, null);
    // AM gets notified of the completed container.
    boolean receivedCompletedContainer = false;
    for (ContainerStatus status : response.getCompletedContainersStatuses()) {
        if (status.getContainerId().equals(runningContainer)) {
            receivedCompletedContainer = true;
        }
    }
    assertTrue(receivedCompletedContainer);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        public Boolean get() {
            // recovered
            return scheduler.getApplicationAttempt(am1.getApplicationAttemptId()).getPendingRelease().isEmpty() && scheduler.getRMContainer(runningContainer) == null;
        }
    }, 1000, 20000);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) Test(org.junit.Test)

Example 62 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestSchedulingWithAllocationRequestId method testMultipleAllocationRequestDiffPriority.

@Test
public void testMultipleAllocationRequestDiffPriority() throws Exception {
    configureScheduler();
    YarnConfiguration conf = getConf();
    MockRM rm = new MockRM(conf);
    try {
        rm.start();
        MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
        MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
        RMApp app1 = rm.submitApp(2048);
        // kick the scheduling
        nm1.nodeHeartbeat(true);
        RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
        MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
        am1.registerAppAttempt();
        // add request for containers with id 10 & 20
        am1.addRequests(new String[] { "127.0.0.1" }, 2 * GB, 2, 1, 10L);
        // send the request
        AllocateResponse allocResponse = am1.schedule();
        am1.addRequests(new String[] { "127.0.0.2" }, 2 * GB, 1, 2, 20L);
        // send the request
        allocResponse = am1.schedule();
        // check if request id 20 is satisfied first
        nm2.nodeHeartbeat(true);
        while (allocResponse.getAllocatedContainers().size() < 2) {
            LOG.info("Waiting for containers to be created for app 1...");
            Thread.sleep(100);
            allocResponse = am1.schedule();
        }
        List<Container> allocated = allocResponse.getAllocatedContainers();
        Assert.assertEquals(2, allocated.size());
        for (Container container : allocated) {
            checkAllocatedContainer(container, 2 * GB, nm2.getNodeId(), 20);
        }
        // check now if request id 10 is satisfied
        nm1.nodeHeartbeat(true);
        // send the request
        allocResponse = am1.schedule();
        while (allocResponse.getAllocatedContainers().size() < 1) {
            LOG.info("Waiting for containers to be created for app 1...");
            Thread.sleep(100);
            allocResponse = am1.schedule();
        }
        allocated = allocResponse.getAllocatedContainers();
        Assert.assertEquals(1, allocated.size());
        checkAllocatedContainer(allocated.get(0), 2 * GB, nm1.getNodeId(), 10);
    } finally {
        if (rm != null) {
            rm.stop();
        }
    }
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Container(org.apache.hadoop.yarn.api.records.Container) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 63 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.

@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req again... this should result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
    // Verify that the Container is still in RUNNING state wrt RM..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation :
    // Allocated cores+mem should have increased, available should decrease
    verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 64 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestAMRMProxyService method testAllocateRequestWithNullValues.

@Test
public void testAllocateRequestWithNullValues() throws Exception {
    int testAppId = 1;
    RegisterApplicationMasterResponse registerResponse = registerApplicationMaster(testAppId);
    Assert.assertNotNull(registerResponse);
    Assert.assertEquals(Integer.toString(testAppId), registerResponse.getQueue());
    AllocateResponse allocateResponse = allocate(testAppId);
    Assert.assertNotNull(allocateResponse);
    FinishApplicationMasterResponse finshResponse = finishApplicationMaster(testAppId, FinalApplicationStatus.SUCCEEDED);
    Assert.assertNotNull(finshResponse);
    Assert.assertEquals(true, finshResponse.getIsUnregistered());
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) FinishApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse) Test(org.junit.Test)

Example 65 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestAMRMProxyService method releaseContainersAndAssert.

private void releaseContainersAndAssert(int appId, List<Container> containers) throws Exception {
    Assert.assertTrue(containers.size() > 0);
    AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class);
    allocateRequest.setResponseId(1);
    List<ContainerId> relList = new ArrayList<ContainerId>(containers.size());
    for (Container container : containers) {
        relList.add(container.getId());
    }
    allocateRequest.setReleaseList(relList);
    AllocateResponse allocateResponse = allocate(appId, allocateRequest);
    Assert.assertNotNull(allocateResponse);
    Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
    // The way the mock resource manager is setup, it will return the containers
    // that were released in the response. This is done because the UAMs run
    // asynchronously and we need to if all the resource managers received the
    // release it. The containers sent by the mock resource managers will be
    // aggregated and returned back to us and we can assert if all the release
    // lists reached the sub-clusters
    List<Container> containersForReleasedContainerIds = new ArrayList<Container>();
    containersForReleasedContainerIds.addAll(allocateResponse.getAllocatedContainers());
    // Send max 10 heart beats to receive all the containers. If not, we will
    // fail the test
    int numHeartbeat = 0;
    while (containersForReleasedContainerIds.size() < relList.size() && numHeartbeat++ < 10) {
        allocateResponse = allocate(appId, Records.newRecord(AllocateRequest.class));
        Assert.assertNotNull(allocateResponse);
        Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
        containersForReleasedContainerIds.addAll(allocateResponse.getAllocatedContainers());
        LOG.info("Number of containers received in this request: " + Integer.toString(allocateResponse.getAllocatedContainers().size()));
        LOG.info("Total number of containers received: " + Integer.toString(containersForReleasedContainerIds.size()));
        Thread.sleep(10);
    }
    Assert.assertEquals(relList.size(), containersForReleasedContainerIds.size());
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ArrayList(java.util.ArrayList)

Aggregations

AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)85 Test (org.junit.Test)54 Container (org.apache.hadoop.yarn.api.records.Container)44 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)38 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)31 ArrayList (java.util.ArrayList)24 AllocateRequest (org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)24 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)19 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)19 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)18 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)16 ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)15 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)15 HashMap (java.util.HashMap)14 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)13 NMToken (org.apache.hadoop.yarn.api.records.NMToken)12 UpdatedContainer (org.apache.hadoop.yarn.api.records.UpdatedContainer)12 Configuration (org.apache.hadoop.conf.Configuration)11