Search in sources :

Example 1 with ResourceBlacklistRequest

use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.

the class TestCapacitySchedulerNodeLabelUpdate method testBlacklistAMDisableLabel.

@Test(timeout = 30000)
public void testBlacklistAMDisableLabel() throws Exception {
    conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
    conf.setFloat(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD, 0.5f);
    mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h2", 0), toSet("x"), NodeId.newInstance("h3", 0), toSet("x"), NodeId.newInstance("h6", 0), toSet("x")));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h4", 0), toSet("y"), NodeId.newInstance("h5", 0), toSet("y"), NodeId.newInstance("h7", 0), toSet("y")));
    MockRM rm = new MockRM(getConfigurationWithQueueLabels(conf)) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm.getRMContext().setNodeLabelManager(mgr);
    rm.start();
    // Nodes in label default h1,h8,h9
    // Nodes in label x h2,h3,h6
    // Nodes in label y h4,h5,h7
    MockNM nm1 = rm.registerNode("h1:1234", 2048);
    MockNM nm2 = rm.registerNode("h2:1234", 2048);
    rm.registerNode("h3:1234", 2048);
    rm.registerNode("h4:1234", 2048);
    rm.registerNode("h5:1234", 2048);
    rm.registerNode("h6:1234", 2048);
    rm.registerNode("h7:1234", 2048);
    rm.registerNode("h8:1234", 2048);
    rm.registerNode("h9:1234", 2048);
    // Submit app with AM container launched on default partition i.e. h1.
    RMApp app = rm.submitApp(GB, "app", "user", null, "a");
    MockRM.launchAndRegisterAM(app, rm, nm1);
    RMAppAttempt appAttempt = app.getCurrentAppAttempt();
    // Add default node blacklist from default
    appAttempt.getAMBlacklistManager().addNode("h1");
    ResourceBlacklistRequest blacklistUpdates = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
    Assert.assertEquals(1, blacklistUpdates.getBlacklistAdditions().size());
    Assert.assertEquals(0, blacklistUpdates.getBlacklistRemovals().size());
    // Adding second node from default parition
    appAttempt.getAMBlacklistManager().addNode("h8");
    blacklistUpdates = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
    Assert.assertEquals(0, blacklistUpdates.getBlacklistAdditions().size());
    Assert.assertEquals(2, blacklistUpdates.getBlacklistRemovals().size());
    // Submission in label x
    RMApp applabel = rm.submitApp(GB, "app", "user", null, "a", "x");
    MockRM.launchAndRegisterAM(applabel, rm, nm2);
    RMAppAttempt appAttemptlabelx = applabel.getCurrentAppAttempt();
    appAttemptlabelx.getAMBlacklistManager().addNode("h2");
    ResourceBlacklistRequest blacklistUpdatesOnx = appAttemptlabelx.getAMBlacklistManager().getBlacklistUpdates();
    Assert.assertEquals(1, blacklistUpdatesOnx.getBlacklistAdditions().size());
    Assert.assertEquals(0, blacklistUpdatesOnx.getBlacklistRemovals().size());
    // Adding second node from default parition
    appAttemptlabelx.getAMBlacklistManager().addNode("h3");
    blacklistUpdatesOnx = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
    Assert.assertEquals(0, blacklistUpdatesOnx.getBlacklistAdditions().size());
    Assert.assertEquals(2, blacklistUpdatesOnx.getBlacklistRemovals().size());
    rm.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ResourceBlacklistRequest(org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 2 with ResourceBlacklistRequest

use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.

the class TestBlacklistManager method testSimpleBlacklistBelowFailureThreshold.

@Test
public void testSimpleBlacklistBelowFailureThreshold() {
    final int numberOfNodeManagerHosts = 3;
    final double blacklistDisableFailureThreshold = 0.8;
    BlacklistManager manager = new SimpleBlacklistManager(numberOfNodeManagerHosts, blacklistDisableFailureThreshold);
    String anyNode = "foo";
    String anyNode2 = "bar";
    manager.addNode(anyNode);
    manager.addNode(anyNode2);
    ResourceBlacklistRequest blacklist = manager.getBlacklistUpdates();
    List<String> blacklistAdditions = blacklist.getBlacklistAdditions();
    Collections.sort(blacklistAdditions);
    List<String> blacklistRemovals = blacklist.getBlacklistRemovals();
    String[] expectedBlacklistAdditions = new String[] { anyNode2, anyNode };
    Assert.assertArrayEquals("Blacklist additions was not as expected", expectedBlacklistAdditions, blacklistAdditions.toArray());
    Assert.assertTrue("Blacklist removals should be empty but was " + blacklistRemovals, blacklistRemovals.isEmpty());
}
Also used : ResourceBlacklistRequest(org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest) Test(org.junit.Test)

Example 3 with ResourceBlacklistRequest

use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.

the class TestSchedulerUtils method testValidateResourceBlacklistRequest.

@Test
public void testValidateResourceBlacklistRequest() throws Exception {
    MyContainerManager containerManager = new MyContainerManager();
    final MockRMWithAMS rm = new MockRMWithAMS(new YarnConfiguration(), containerManager);
    rm.start();
    MockNM nm1 = rm.registerNode("localhost:1234", 5120);
    Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>(2);
    acls.put(ApplicationAccessType.VIEW_APP, "*");
    RMApp app = rm.submitApp(1024, "appname", "appuser", acls);
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    ApplicationAttemptId applicationAttemptId = attempt.getAppAttemptId();
    waitForLaunchedState(attempt);
    // Create a client to the RM.
    final Configuration conf = rm.getConfig();
    final YarnRPC rpc = YarnRPC.create(conf);
    UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(applicationAttemptId.toString());
    Credentials credentials = containerManager.getContainerCredentials();
    final InetSocketAddress rmBindAddress = rm.getApplicationMasterService().getBindAddress();
    Token<? extends TokenIdentifier> amRMToken = MockRMWithAMS.setupAndReturnAMRMToken(rmBindAddress, credentials.getAllTokens());
    currentUser.addToken(amRMToken);
    ApplicationMasterProtocol client = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

        @Override
        public ApplicationMasterProtocol run() {
            return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, rmBindAddress, conf);
        }
    });
    RegisterApplicationMasterRequest request = Records.newRecord(RegisterApplicationMasterRequest.class);
    client.registerApplicationMaster(request);
    ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(Collections.singletonList(ResourceRequest.ANY), null);
    AllocateRequest allocateRequest = AllocateRequest.newInstance(0, 0.0f, null, null, blacklistRequest);
    boolean error = false;
    try {
        client.allocate(allocateRequest);
    } catch (InvalidResourceBlacklistRequestException e) {
        error = true;
    }
    rm.stop();
    Assert.assertTrue("Didn't not catch InvalidResourceBlacklistRequestException", error);
}
Also used : MyContainerManager(org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MyContainerManager) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashMap(java.util.HashMap) ResourceBlacklistRequest(org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) InetSocketAddress(java.net.InetSocketAddress) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) MockRMWithAMS(org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MockRMWithAMS) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) RegisterApplicationMasterRequest(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnRPC(org.apache.hadoop.yarn.ipc.YarnRPC) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) InvalidResourceBlacklistRequestException(org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 4 with ResourceBlacklistRequest

use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.

the class BaseAMRMProxyE2ETest method createAllocateRequest.

protected AllocateRequest createAllocateRequest(List<NodeReport> listNode) {
    // The test needs AMRMClient to create a real allocate request
    AMRMClientImpl<AMRMClient.ContainerRequest> amClient = new AMRMClientImpl<>();
    Resource capability = Resource.newInstance(1024, 2);
    Priority priority = Priority.newInstance(1);
    List<NodeReport> nodeReports = listNode;
    String node = nodeReports.get(0).getNodeId().getHost();
    String[] nodes = new String[] { node };
    AMRMClient.ContainerRequest storedContainer1 = new AMRMClient.ContainerRequest(capability, nodes, null, priority);
    amClient.addContainerRequest(storedContainer1);
    amClient.addContainerRequest(storedContainer1);
    List<ResourceRequest> resourceAsk = new ArrayList<>();
    for (ResourceRequest rr : amClient.ask) {
        resourceAsk.add(rr);
    }
    ResourceBlacklistRequest resourceBlacklistRequest = ResourceBlacklistRequest.newInstance(new ArrayList<>(), new ArrayList<>());
    int responseId = 1;
    return AllocateRequest.newInstance(responseId, 0, resourceAsk, new ArrayList<>(), resourceBlacklistRequest);
}
Also used : AMRMClient(org.apache.hadoop.yarn.client.api.AMRMClient) ResourceBlacklistRequest(org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ArrayList(java.util.ArrayList) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport)

Example 5 with ResourceBlacklistRequest

use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.

the class AMRMClientImpl method allocate.

@Override
public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
    Preconditions.checkArgument(progressIndicator >= 0, "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    List<ResourceRequest> askList = null;
    List<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
    List<String> blacklistToAdd = new ArrayList<String>();
    List<String> blacklistToRemove = new ArrayList<String>();
    Map<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> oldChange = new HashMap<>();
    try {
        synchronized (this) {
            askList = cloneAsks();
            // Save the current change for recovery
            oldChange.putAll(change);
            List<UpdateContainerRequest> updateList = createUpdateList();
            releaseList = new ArrayList<ContainerId>(release);
            // optimistically clear this collection assuming no RPC failure
            ask.clear();
            release.clear();
            change.clear();
            blacklistToAdd.addAll(blacklistAdditions);
            blacklistToRemove.addAll(blacklistRemovals);
            ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove);
            allocateRequest = AllocateRequest.newBuilder().responseId(lastResponseId).progress(progressIndicator).askList(askList).resourceBlacklistRequest(blacklistRequest).releaseList(releaseList).updateRequests(updateList).build();
            // clear blacklistAdditions and blacklistRemovals before
            // unsynchronized part
            blacklistAdditions.clear();
            blacklistRemovals.clear();
        }
        try {
            allocateResponse = rmClient.allocate(allocateRequest);
        } catch (ApplicationMasterNotRegisteredException e) {
            LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
            synchronized (this) {
                release.addAll(this.pendingRelease);
                blacklistAdditions.addAll(this.blacklistedNodes);
                for (RemoteRequestsTable remoteRequestsTable : remoteRequests.values()) {
                    @SuppressWarnings("unchecked") Iterator<ResourceRequestInfo<T>> reqIter = remoteRequestsTable.iterator();
                    while (reqIter.hasNext()) {
                        addResourceRequestToAsk(reqIter.next().remoteRequest);
                    }
                }
                change.putAll(this.pendingChange);
            }
            // re register with RM
            registerApplicationMaster();
            allocateResponse = allocate(progressIndicator);
            return allocateResponse;
        }
        synchronized (this) {
            // update these on successful RPC
            clusterNodeCount = allocateResponse.getNumClusterNodes();
            lastResponseId = allocateResponse.getResponseId();
            clusterAvailableResources = allocateResponse.getAvailableResources();
            if (!allocateResponse.getNMTokens().isEmpty()) {
                populateNMTokens(allocateResponse.getNMTokens());
            }
            if (allocateResponse.getAMRMToken() != null) {
                updateAMRMToken(allocateResponse.getAMRMToken());
            }
            if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
                removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
            }
            if (!pendingChange.isEmpty()) {
                List<ContainerStatus> completed = allocateResponse.getCompletedContainersStatuses();
                List<UpdatedContainer> changed = new ArrayList<>();
                changed.addAll(allocateResponse.getUpdatedContainers());
                // containers
                for (ContainerStatus status : completed) {
                    ContainerId containerId = status.getContainerId();
                    pendingChange.remove(containerId);
                }
                // remove all pending change requests that have been satisfied
                if (!changed.isEmpty()) {
                    removePendingChangeRequests(changed);
                }
            }
        }
    } finally {
        // TODO how to differentiate remote yarn exception vs error in rpc
        if (allocateResponse == null) {
            // preserve ask and release for next call to allocate()
            synchronized (this) {
                release.addAll(releaseList);
                // synchronized block at the beginning of this method.
                for (ResourceRequest oldAsk : askList) {
                    if (!ask.contains(oldAsk)) {
                        ask.add(oldAsk);
                    }
                }
                // that do not exist in the current change map:
                for (Map.Entry<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> entry : oldChange.entrySet()) {
                    ContainerId oldContainerId = entry.getKey();
                    Container oldContainer = entry.getValue().getKey();
                    UpdateContainerRequest oldupdate = entry.getValue().getValue();
                    if (change.get(oldContainerId) == null) {
                        change.put(oldContainerId, new SimpleEntry<>(oldContainer, oldupdate));
                    }
                }
                blacklistAdditions.addAll(blacklistToAdd);
                blacklistRemovals.addAll(blacklistToRemove);
            }
        }
    }
    return allocateResponse;
}
Also used : HashMap(java.util.HashMap) ResourceBlacklistRequest(org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ArrayList(java.util.ArrayList) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Iterator(java.util.Iterator) SimpleEntry(java.util.AbstractMap.SimpleEntry) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

ResourceBlacklistRequest (org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest)10 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)4 AllocateRequest (org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)3 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)3 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)3 HashMap (java.util.HashMap)2 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)2 NodeReport (org.apache.hadoop.yarn.api.records.NodeReport)2 Resource (org.apache.hadoop.yarn.api.records.Resource)2 InvalidResourceBlacklistRequestException (org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException)2 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)2 InetSocketAddress (java.net.InetSocketAddress)1 SimpleEntry (java.util.AbstractMap.SimpleEntry)1 Iterator (java.util.Iterator)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1 Credentials (org.apache.hadoop.security.Credentials)1