Search in sources :

Example 1 with ResourceOption

use of org.apache.hadoop.yarn.api.records.ResourceOption in project hadoop by apache.

the class RMAdminCLI method updateNodeResource.

private int updateNodeResource(String nodeIdStr, int memSize, int cores, int overCommitTimeout) throws IOException, YarnException {
    // check resource value first
    if (invalidResourceValue(memSize, cores)) {
        throw new IllegalArgumentException("Invalid resource value: " + "(" + memSize + "," + cores + ") for updateNodeResource.");
    }
    // Refresh the nodes
    ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
    UpdateNodeResourceRequest request = recordFactory.newRecordInstance(UpdateNodeResourceRequest.class);
    NodeId nodeId = NodeId.fromString(nodeIdStr);
    Resource resource = Resources.createResource(memSize, cores);
    Map<NodeId, ResourceOption> resourceMap = new HashMap<NodeId, ResourceOption>();
    resourceMap.put(nodeId, ResourceOption.newInstance(resource, overCommitTimeout));
    request.setNodeResourceMap(resourceMap);
    adminProtocol.updateNodeResource(request);
    return 0;
}
Also used : ResourceManagerAdministrationProtocol(org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol) HashMap(java.util.HashMap) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest)

Example 2 with ResourceOption

use of org.apache.hadoop.yarn.api.records.ResourceOption in project hadoop by apache.

the class TestFifoScheduler method testResourceOverCommit.

@Test(timeout = 60000)
public void testResourceOverCommit() throws Exception {
    int waitCount;
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
    RMApp app1 = rm.submitApp(2048);
    // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 2 GB used and 2 GB available
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // add request for containers
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
    // send the request
    AllocateResponse alloc1Response = am1.schedule();
    // kick the scheduler, 2 GB given to AM1, resource remaining 0
    nm1.nodeHeartbeat(true);
    while (alloc1Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(1000);
        alloc1Response = am1.schedule();
    }
    List<Container> allocated1 = alloc1Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated1.size());
    Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
    Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 4 GB used and 0 GB available
    Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    // check container is assigned with 2 GB.
    Container c1 = allocated1.get(0);
    Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
    // update node resource to 2 GB, so resource is over-consumed.
    Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
    nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
    UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
    rm.getAdminService().updateNodeResource(request);
    waitCount = 0;
    while (waitCount++ != 20) {
        report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
        if (null != report_nm1 && report_nm1.getAvailableResource().getMemorySize() != 0) {
            break;
        }
        LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
        Thread.sleep(1000);
    }
    // Now, the used resource is still 4 GB, and available resource is minus
    // value.
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // Check container can complete successfully in case of resource
    // over-commitment.
    ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
    nm1.containerStatus(containerStatus);
    waitCount = 0;
    while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
        LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
        Thread.sleep(100);
    }
    Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
    Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    // As container return 2 GB back, the available resource becomes 0 again.
    Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) SchedulerNodeReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 3 with ResourceOption

use of org.apache.hadoop.yarn.api.records.ResourceOption in project hadoop by apache.

the class TestCapacityScheduler method testResourceOverCommit.

@Test
public void testResourceOverCommit() throws Exception {
    int waitCount;
    Configuration conf = new Configuration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
    RMApp app1 = rm.submitApp(2048);
    // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 2 GB used and 2 GB available
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // add request for containers
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
    // send the request
    AllocateResponse alloc1Response = am1.schedule();
    // kick the scheduler, 2 GB given to AM1, resource remaining 0
    nm1.nodeHeartbeat(true);
    while (alloc1Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(100);
        alloc1Response = am1.schedule();
    }
    List<Container> allocated1 = alloc1Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated1.size());
    Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
    Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 4 GB used and 0 GB available
    Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    // check container is assigned with 2 GB.
    Container c1 = allocated1.get(0);
    Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
    // update node resource to 2 GB, so resource is over-consumed.
    Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
    nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
    UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
    AdminService as = ((MockRM) rm).getAdminService();
    as.updateNodeResource(request);
    waitCount = 0;
    while (waitCount++ != 20) {
        report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
        if (report_nm1.getAvailableResource().getMemorySize() != 0) {
            break;
        }
        LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
        Thread.sleep(1000);
    }
    // Now, the used resource is still 4 GB, and available resource is minus value.
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // Check container can complete successfully in case of resource over-commitment.
    ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
    nm1.containerStatus(containerStatus);
    waitCount = 0;
    while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
        LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
        Thread.sleep(100);
    }
    Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
    Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    // As container return 2 GB back, the available resource becomes 0 again.
    Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
    // Verify no NPE is trigger in schedule after resource is updated.
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 1, 1);
    alloc1Response = am1.schedule();
    Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
    int times = 0;
    // try 10 times as scheduling is async process.
    while (alloc1Response.getAllocatedContainers().size() < 1 && times++ < 10) {
        LOG.info("Waiting for containers to be allocated for app 1... Tried " + times + " times already..");
        Thread.sleep(100);
    }
    Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) AdminService(org.apache.hadoop.yarn.server.resourcemanager.AdminService) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) SchedulerNodeReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 4 with ResourceOption

use of org.apache.hadoop.yarn.api.records.ResourceOption in project hadoop by apache.

the class AdminService method refreshNodesResources.

@Override
public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request) throws YarnException, StandbyException {
    final String operation = "refreshNodesResources";
    UserGroupInformation user = checkAcls(operation);
    final String msg = "refresh nodes.";
    checkRMStatus(user.getShortUserName(), operation, msg);
    RefreshNodesResourcesResponse response = recordFactory.newRecordInstance(RefreshNodesResourcesResponse.class);
    try {
        Configuration conf = getConfig();
        Configuration configuration = new Configuration(conf);
        DynamicResourceConfiguration newConf;
        InputStream drInputStream = this.rmContext.getConfigurationProvider().getConfigurationInputStream(configuration, YarnConfiguration.DR_CONFIGURATION_FILE);
        if (drInputStream != null) {
            newConf = new DynamicResourceConfiguration(configuration, drInputStream);
        } else {
            newConf = new DynamicResourceConfiguration(configuration);
        }
        if (newConf.getNodes() != null && newConf.getNodes().length != 0) {
            Map<NodeId, ResourceOption> nodeResourceMap = newConf.getNodeResourceMap();
            UpdateNodeResourceRequest updateRequest = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
            updateNodeResource(updateRequest);
        }
        // refresh dynamic resource in ResourceTrackerService
        this.rmContext.getResourceTrackerService().updateDynamicResourceConfiguration(newConf);
        RMAuditLogger.logSuccess(user.getShortUserName(), operation, "AdminService");
        return response;
    } catch (IOException ioe) {
        throw logAndWrapException(ioe, user.getShortUserName(), operation, msg);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) InputStream(java.io.InputStream) NodeId(org.apache.hadoop.yarn.api.records.NodeId) IOException(java.io.IOException) RefreshNodesResourcesResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest)

Example 5 with ResourceOption

use of org.apache.hadoop.yarn.api.records.ResourceOption in project hadoop by apache.

the class TestRMAdminCLI method testUpdateNodeResource.

@Test(timeout = 500)
public void testUpdateNodeResource() throws Exception {
    String nodeIdStr = "0.0.0.0:0";
    int memSize = 2048;
    int cores = 2;
    String[] args = { "-updateNodeResource", nodeIdStr, Integer.toString(memSize), Integer.toString(cores) };
    assertEquals(0, rmAdminCLI.run(args));
    ArgumentCaptor<UpdateNodeResourceRequest> argument = ArgumentCaptor.forClass(UpdateNodeResourceRequest.class);
    verify(admin).updateNodeResource(argument.capture());
    UpdateNodeResourceRequest request = argument.getValue();
    Map<NodeId, ResourceOption> resourceMap = request.getNodeResourceMap();
    NodeId nodeId = NodeId.fromString(nodeIdStr);
    Resource expectedResource = Resources.createResource(memSize, cores);
    ResourceOption resource = resourceMap.get(nodeId);
    assertNotNull("resource for " + nodeIdStr + " shouldn't be null.", resource);
    assertEquals("resource value for " + nodeIdStr + " is not as expected.", ResourceOption.newInstance(expectedResource, ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), resource);
}
Also used : ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest) Test(org.junit.Test)

Aggregations

ResourceOption (org.apache.hadoop.yarn.api.records.ResourceOption)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)8 UpdateNodeResourceRequest (org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest)5 HashMap (java.util.HashMap)4 Resource (org.apache.hadoop.yarn.api.records.Resource)3 Test (org.junit.Test)3 Configuration (org.apache.hadoop.conf.Configuration)2 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)2 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)2 Container (org.apache.hadoop.yarn.api.records.Container)2 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)2 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)2 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)2 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)2 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)2 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)2 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)2 SchedulerNodeReport (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport)2 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1