use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestCapacitySchedulerNodeLabelUpdate method testBlacklistAMDisableLabel.
@Test(timeout = 30000)
public void testBlacklistAMDisableLabel() throws Exception {
conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
conf.setFloat(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD, 0.5f);
mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h2", 0), toSet("x"), NodeId.newInstance("h3", 0), toSet("x"), NodeId.newInstance("h6", 0), toSet("x")));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h4", 0), toSet("y"), NodeId.newInstance("h5", 0), toSet("y"), NodeId.newInstance("h7", 0), toSet("y")));
MockRM rm = new MockRM(getConfigurationWithQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm.getRMContext().setNodeLabelManager(mgr);
rm.start();
// Nodes in label default h1,h8,h9
// Nodes in label x h2,h3,h6
// Nodes in label y h4,h5,h7
MockNM nm1 = rm.registerNode("h1:1234", 2048);
MockNM nm2 = rm.registerNode("h2:1234", 2048);
rm.registerNode("h3:1234", 2048);
rm.registerNode("h4:1234", 2048);
rm.registerNode("h5:1234", 2048);
rm.registerNode("h6:1234", 2048);
rm.registerNode("h7:1234", 2048);
rm.registerNode("h8:1234", 2048);
rm.registerNode("h9:1234", 2048);
// Submit app with AM container launched on default partition i.e. h1.
RMApp app = rm.submitApp(GB, "app", "user", null, "a");
MockRM.launchAndRegisterAM(app, rm, nm1);
RMAppAttempt appAttempt = app.getCurrentAppAttempt();
// Add default node blacklist from default
appAttempt.getAMBlacklistManager().addNode("h1");
ResourceBlacklistRequest blacklistUpdates = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
Assert.assertEquals(1, blacklistUpdates.getBlacklistAdditions().size());
Assert.assertEquals(0, blacklistUpdates.getBlacklistRemovals().size());
// Adding second node from default parition
appAttempt.getAMBlacklistManager().addNode("h8");
blacklistUpdates = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
Assert.assertEquals(0, blacklistUpdates.getBlacklistAdditions().size());
Assert.assertEquals(2, blacklistUpdates.getBlacklistRemovals().size());
// Submission in label x
RMApp applabel = rm.submitApp(GB, "app", "user", null, "a", "x");
MockRM.launchAndRegisterAM(applabel, rm, nm2);
RMAppAttempt appAttemptlabelx = applabel.getCurrentAppAttempt();
appAttemptlabelx.getAMBlacklistManager().addNode("h2");
ResourceBlacklistRequest blacklistUpdatesOnx = appAttemptlabelx.getAMBlacklistManager().getBlacklistUpdates();
Assert.assertEquals(1, blacklistUpdatesOnx.getBlacklistAdditions().size());
Assert.assertEquals(0, blacklistUpdatesOnx.getBlacklistRemovals().size());
// Adding second node from default parition
appAttemptlabelx.getAMBlacklistManager().addNode("h3");
blacklistUpdatesOnx = appAttempt.getAMBlacklistManager().getBlacklistUpdates();
Assert.assertEquals(0, blacklistUpdatesOnx.getBlacklistAdditions().size());
Assert.assertEquals(2, blacklistUpdatesOnx.getBlacklistRemovals().size());
rm.close();
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestBlacklistManager method testSimpleBlacklistBelowFailureThreshold.
@Test
public void testSimpleBlacklistBelowFailureThreshold() {
final int numberOfNodeManagerHosts = 3;
final double blacklistDisableFailureThreshold = 0.8;
BlacklistManager manager = new SimpleBlacklistManager(numberOfNodeManagerHosts, blacklistDisableFailureThreshold);
String anyNode = "foo";
String anyNode2 = "bar";
manager.addNode(anyNode);
manager.addNode(anyNode2);
ResourceBlacklistRequest blacklist = manager.getBlacklistUpdates();
List<String> blacklistAdditions = blacklist.getBlacklistAdditions();
Collections.sort(blacklistAdditions);
List<String> blacklistRemovals = blacklist.getBlacklistRemovals();
String[] expectedBlacklistAdditions = new String[] { anyNode2, anyNode };
Assert.assertArrayEquals("Blacklist additions was not as expected", expectedBlacklistAdditions, blacklistAdditions.toArray());
Assert.assertTrue("Blacklist removals should be empty but was " + blacklistRemovals, blacklistRemovals.isEmpty());
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestSchedulerUtils method testValidateResourceBlacklistRequest.
@Test
public void testValidateResourceBlacklistRequest() throws Exception {
MyContainerManager containerManager = new MyContainerManager();
final MockRMWithAMS rm = new MockRMWithAMS(new YarnConfiguration(), containerManager);
rm.start();
MockNM nm1 = rm.registerNode("localhost:1234", 5120);
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>(2);
acls.put(ApplicationAccessType.VIEW_APP, "*");
RMApp app = rm.submitApp(1024, "appname", "appuser", acls);
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId applicationAttemptId = attempt.getAppAttemptId();
waitForLaunchedState(attempt);
// Create a client to the RM.
final Configuration conf = rm.getConfig();
final YarnRPC rpc = YarnRPC.create(conf);
UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(applicationAttemptId.toString());
Credentials credentials = containerManager.getContainerCredentials();
final InetSocketAddress rmBindAddress = rm.getApplicationMasterService().getBindAddress();
Token<? extends TokenIdentifier> amRMToken = MockRMWithAMS.setupAndReturnAMRMToken(rmBindAddress, credentials.getAllTokens());
currentUser.addToken(amRMToken);
ApplicationMasterProtocol client = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, rmBindAddress, conf);
}
});
RegisterApplicationMasterRequest request = Records.newRecord(RegisterApplicationMasterRequest.class);
client.registerApplicationMaster(request);
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(Collections.singletonList(ResourceRequest.ANY), null);
AllocateRequest allocateRequest = AllocateRequest.newInstance(0, 0.0f, null, null, blacklistRequest);
boolean error = false;
try {
client.allocate(allocateRequest);
} catch (InvalidResourceBlacklistRequestException e) {
error = true;
}
rm.stop();
Assert.assertTrue("Didn't not catch InvalidResourceBlacklistRequestException", error);
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class BaseAMRMProxyE2ETest method createAllocateRequest.
protected AllocateRequest createAllocateRequest(List<NodeReport> listNode) {
// The test needs AMRMClient to create a real allocate request
AMRMClientImpl<AMRMClient.ContainerRequest> amClient = new AMRMClientImpl<>();
Resource capability = Resource.newInstance(1024, 2);
Priority priority = Priority.newInstance(1);
List<NodeReport> nodeReports = listNode;
String node = nodeReports.get(0).getNodeId().getHost();
String[] nodes = new String[] { node };
AMRMClient.ContainerRequest storedContainer1 = new AMRMClient.ContainerRequest(capability, nodes, null, priority);
amClient.addContainerRequest(storedContainer1);
amClient.addContainerRequest(storedContainer1);
List<ResourceRequest> resourceAsk = new ArrayList<>();
for (ResourceRequest rr : amClient.ask) {
resourceAsk.add(rr);
}
ResourceBlacklistRequest resourceBlacklistRequest = ResourceBlacklistRequest.newInstance(new ArrayList<>(), new ArrayList<>());
int responseId = 1;
return AllocateRequest.newInstance(responseId, 0, resourceAsk, new ArrayList<>(), resourceBlacklistRequest);
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class AMRMClientImpl method allocate.
@Override
public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
Preconditions.checkArgument(progressIndicator >= 0, "Progress indicator should not be negative");
AllocateResponse allocateResponse = null;
List<ResourceRequest> askList = null;
List<ContainerId> releaseList = null;
AllocateRequest allocateRequest = null;
List<String> blacklistToAdd = new ArrayList<String>();
List<String> blacklistToRemove = new ArrayList<String>();
Map<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> oldChange = new HashMap<>();
try {
synchronized (this) {
askList = cloneAsks();
// Save the current change for recovery
oldChange.putAll(change);
List<UpdateContainerRequest> updateList = createUpdateList();
releaseList = new ArrayList<ContainerId>(release);
// optimistically clear this collection assuming no RPC failure
ask.clear();
release.clear();
change.clear();
blacklistToAdd.addAll(blacklistAdditions);
blacklistToRemove.addAll(blacklistRemovals);
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove);
allocateRequest = AllocateRequest.newBuilder().responseId(lastResponseId).progress(progressIndicator).askList(askList).resourceBlacklistRequest(blacklistRequest).releaseList(releaseList).updateRequests(updateList).build();
// clear blacklistAdditions and blacklistRemovals before
// unsynchronized part
blacklistAdditions.clear();
blacklistRemovals.clear();
}
try {
allocateResponse = rmClient.allocate(allocateRequest);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
synchronized (this) {
release.addAll(this.pendingRelease);
blacklistAdditions.addAll(this.blacklistedNodes);
for (RemoteRequestsTable remoteRequestsTable : remoteRequests.values()) {
@SuppressWarnings("unchecked") Iterator<ResourceRequestInfo<T>> reqIter = remoteRequestsTable.iterator();
while (reqIter.hasNext()) {
addResourceRequestToAsk(reqIter.next().remoteRequest);
}
}
change.putAll(this.pendingChange);
}
// re register with RM
registerApplicationMaster();
allocateResponse = allocate(progressIndicator);
return allocateResponse;
}
synchronized (this) {
// update these on successful RPC
clusterNodeCount = allocateResponse.getNumClusterNodes();
lastResponseId = allocateResponse.getResponseId();
clusterAvailableResources = allocateResponse.getAvailableResources();
if (!allocateResponse.getNMTokens().isEmpty()) {
populateNMTokens(allocateResponse.getNMTokens());
}
if (allocateResponse.getAMRMToken() != null) {
updateAMRMToken(allocateResponse.getAMRMToken());
}
if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
}
if (!pendingChange.isEmpty()) {
List<ContainerStatus> completed = allocateResponse.getCompletedContainersStatuses();
List<UpdatedContainer> changed = new ArrayList<>();
changed.addAll(allocateResponse.getUpdatedContainers());
// containers
for (ContainerStatus status : completed) {
ContainerId containerId = status.getContainerId();
pendingChange.remove(containerId);
}
// remove all pending change requests that have been satisfied
if (!changed.isEmpty()) {
removePendingChangeRequests(changed);
}
}
}
} finally {
// TODO how to differentiate remote yarn exception vs error in rpc
if (allocateResponse == null) {
// preserve ask and release for next call to allocate()
synchronized (this) {
release.addAll(releaseList);
// synchronized block at the beginning of this method.
for (ResourceRequest oldAsk : askList) {
if (!ask.contains(oldAsk)) {
ask.add(oldAsk);
}
}
// that do not exist in the current change map:
for (Map.Entry<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> entry : oldChange.entrySet()) {
ContainerId oldContainerId = entry.getKey();
Container oldContainer = entry.getValue().getKey();
UpdateContainerRequest oldupdate = entry.getValue().getValue();
if (change.get(oldContainerId) == null) {
change.put(oldContainerId, new SimpleEntry<>(oldContainer, oldupdate));
}
}
blacklistAdditions.addAll(blacklistToAdd);
blacklistRemovals.addAll(blacklistToRemove);
}
}
}
return allocateResponse;
}
Aggregations