use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestSchedulerUtils method testValidateResourceBlacklistRequest.
@Test
public void testValidateResourceBlacklistRequest() throws Exception {
MyContainerManager containerManager = new MyContainerManager();
final MockRMWithAMS rm = new MockRMWithAMS(new YarnConfiguration(), containerManager);
rm.start();
MockNM nm1 = rm.registerNode("localhost:1234", 5120);
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>(2);
acls.put(ApplicationAccessType.VIEW_APP, "*");
RMApp app = rm.submitApp(1024, "appname", "appuser", acls);
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId applicationAttemptId = attempt.getAppAttemptId();
waitForLaunchedState(attempt);
// Create a client to the RM.
final Configuration conf = rm.getConfig();
final YarnRPC rpc = YarnRPC.create(conf);
UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(applicationAttemptId.toString());
Credentials credentials = containerManager.getContainerCredentials();
final InetSocketAddress rmBindAddress = rm.getApplicationMasterService().getBindAddress();
Token<? extends TokenIdentifier> amRMToken = MockRMWithAMS.setupAndReturnAMRMToken(rmBindAddress, credentials.getAllTokens());
currentUser.addToken(amRMToken);
ApplicationMasterProtocol client = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, rmBindAddress, conf);
}
});
RegisterApplicationMasterRequest request = Records.newRecord(RegisterApplicationMasterRequest.class);
client.registerApplicationMaster(request);
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(Collections.singletonList(ResourceRequest.ANY), null);
AllocateRequest allocateRequest = AllocateRequest.newInstance(0, 0.0f, null, null, blacklistRequest);
boolean error = false;
try {
client.allocate(allocateRequest);
} catch (InvalidResourceBlacklistRequestException e) {
error = true;
}
rm.stop();
Assert.assertTrue("Didn't not catch InvalidResourceBlacklistRequestException", error);
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestBlacklistManager method testSimpleBlacklistAboveFailureThreshold.
@Test
public void testSimpleBlacklistAboveFailureThreshold() {
// Create a threshold of 0.5 * 3 i.e at 1.5 node failures.
BlacklistManager manager = new SimpleBlacklistManager(3, 0.5);
String anyNode = "foo";
String anyNode2 = "bar";
manager.addNode(anyNode);
ResourceBlacklistRequest blacklist = manager.getBlacklistUpdates();
List<String> blacklistAdditions = blacklist.getBlacklistAdditions();
Collections.sort(blacklistAdditions);
List<String> blacklistRemovals = blacklist.getBlacklistRemovals();
String[] expectedBlacklistAdditions = new String[] { anyNode };
Assert.assertArrayEquals("Blacklist additions was not as expected", expectedBlacklistAdditions, blacklistAdditions.toArray());
Assert.assertTrue("Blacklist removals should be empty but was " + blacklistRemovals, blacklistRemovals.isEmpty());
manager.addNode(anyNode2);
blacklist = manager.getBlacklistUpdates();
blacklistAdditions = blacklist.getBlacklistAdditions();
Collections.sort(blacklistAdditions);
blacklistRemovals = blacklist.getBlacklistRemovals();
Collections.sort(blacklistRemovals);
String[] expectedBlacklistRemovals = new String[] { anyNode2, anyNode };
Assert.assertTrue("Blacklist additions should be empty but was " + blacklistAdditions, blacklistAdditions.isEmpty());
Assert.assertArrayEquals("Blacklist removals was not as expected", expectedBlacklistRemovals, blacklistRemovals.toArray());
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class TestBlacklistManager method testDisabledBlacklist.
@Test
public void testDisabledBlacklist() {
BlacklistManager disabled = new DisabledBlacklistManager();
String anyNode = "foo";
disabled.addNode(anyNode);
ResourceBlacklistRequest blacklist = disabled.getBlacklistUpdates();
List<String> blacklistAdditions = blacklist.getBlacklistAdditions();
List<String> blacklistRemovals = blacklist.getBlacklistRemovals();
Assert.assertTrue("Blacklist additions should be empty but was " + blacklistAdditions, blacklistAdditions.isEmpty());
Assert.assertTrue("Blacklist removals should be empty but was " + blacklistRemovals, blacklistRemovals.isEmpty());
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class ApplicationMasterService method allocateInternal.
protected void allocateInternal(ApplicationAttemptId appAttemptId, AllocateRequest request, AllocateResponse allocateResponse) throws YarnException {
//filter illegal progress values
float filteredProgress = request.getProgress();
if (Float.isNaN(filteredProgress) || filteredProgress == Float.NEGATIVE_INFINITY || filteredProgress < 0) {
request.setProgress(0);
} else if (filteredProgress > 1 || filteredProgress == Float.POSITIVE_INFINITY) {
request.setProgress(1);
}
// Send the status update to the appAttempt.
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptStatusupdateEvent(appAttemptId, request.getProgress()));
List<ResourceRequest> ask = request.getAskList();
List<ContainerId> release = request.getReleaseList();
ResourceBlacklistRequest blacklistRequest = request.getResourceBlacklistRequest();
List<String> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest.getBlacklistAdditions() : Collections.EMPTY_LIST;
List<String> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.getBlacklistRemovals() : Collections.EMPTY_LIST;
RMApp app = this.rmContext.getRMApps().get(appAttemptId.getApplicationId());
// set label expression for Resource Requests if resourceName=ANY
ApplicationSubmissionContext asc = app.getApplicationSubmissionContext();
for (ResourceRequest req : ask) {
if (null == req.getNodeLabelExpression() && ResourceRequest.ANY.equals(req.getResourceName())) {
req.setNodeLabelExpression(asc.getNodeLabelExpression());
}
}
Resource maximumCapacity = rScheduler.getMaximumResourceCapability();
// sanity check
try {
RMServerUtils.normalizeAndValidateRequests(ask, maximumCapacity, app.getQueue(), rScheduler, rmContext);
} catch (InvalidResourceRequestException e) {
LOG.warn("Invalid resource ask by application " + appAttemptId, e);
throw e;
}
try {
RMServerUtils.validateBlacklistRequest(blacklistRequest);
} catch (InvalidResourceBlacklistRequestException e) {
LOG.warn("Invalid blacklist request by application " + appAttemptId, e);
throw e;
}
// AM to release containers from the earlier attempt.
if (!app.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
try {
RMServerUtils.validateContainerReleaseRequest(release, appAttemptId);
} catch (InvalidContainerReleaseException e) {
LOG.warn("Invalid container release by application " + appAttemptId, e);
throw e;
}
}
// Split Update Resource Requests into increase and decrease.
// No Exceptions are thrown here. All update errors are aggregated
// and returned to the AM.
List<UpdateContainerError> updateErrors = new ArrayList<>();
ContainerUpdates containerUpdateRequests = RMServerUtils.validateAndSplitUpdateResourceRequests(rmContext, request, maximumCapacity, updateErrors);
// Send new requests to appAttempt.
Allocation allocation;
RMAppAttemptState state = app.getRMAppAttempt(appAttemptId).getAppAttemptState();
if (state.equals(RMAppAttemptState.FINAL_SAVING) || state.equals(RMAppAttemptState.FINISHING) || app.isAppFinalStateStored()) {
LOG.warn(appAttemptId + " is in " + state + " state, ignore container allocate request.");
allocation = EMPTY_ALLOCATION;
} else {
allocation = this.rScheduler.allocate(appAttemptId, ask, release, blacklistAdditions, blacklistRemovals, containerUpdateRequests);
}
if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) {
LOG.info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals);
}
RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId);
if (allocation.getNMTokens() != null && !allocation.getNMTokens().isEmpty()) {
allocateResponse.setNMTokens(allocation.getNMTokens());
}
// Notify the AM of container update errors
addToUpdateContainerErrors(allocateResponse, updateErrors);
// update the response with the deltas of node status changes
List<RMNode> updatedNodes = new ArrayList<RMNode>();
if (app.pullRMNodeUpdates(updatedNodes) > 0) {
List<NodeReport> updatedNodeReports = new ArrayList<NodeReport>();
for (RMNode rmNode : updatedNodes) {
SchedulerNodeReport schedulerNodeReport = rScheduler.getNodeReport(rmNode.getNodeID());
Resource used = BuilderUtils.newResource(0, 0);
int numContainers = 0;
if (schedulerNodeReport != null) {
used = schedulerNodeReport.getUsedResource();
numContainers = schedulerNodeReport.getNumContainers();
}
NodeId nodeId = rmNode.getNodeID();
NodeReport report = BuilderUtils.newNodeReport(nodeId, rmNode.getState(), rmNode.getHttpAddress(), rmNode.getRackName(), used, rmNode.getTotalCapability(), numContainers, rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), rmNode.getNodeLabels());
updatedNodeReports.add(report);
}
allocateResponse.setUpdatedNodes(updatedNodeReports);
}
addToAllocatedContainers(allocateResponse, allocation.getContainers());
allocateResponse.setCompletedContainersStatuses(appAttempt.pullJustFinishedContainers());
allocateResponse.setAvailableResources(allocation.getResourceLimit());
addToContainerUpdates(appAttemptId, allocateResponse, allocation);
allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes());
// add collector address for this application
if (YarnConfiguration.timelineServiceV2Enabled(getConfig())) {
allocateResponse.setCollectorAddr(this.rmContext.getRMApps().get(appAttemptId.getApplicationId()).getCollectorAddr());
}
// add preemption to the allocateResponse message (if any)
allocateResponse.setPreemptionMessage(generatePreemptionMessage(allocation));
// Set application priority
allocateResponse.setApplicationPriority(app.getApplicationPriority());
}
use of org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest in project hadoop by apache.
the class SimpleBlacklistManager method getBlacklistUpdates.
@Override
public ResourceBlacklistRequest getBlacklistUpdates() {
ResourceBlacklistRequest ret;
List<String> blacklist = new ArrayList<>(blacklistNodes);
final int currentBlacklistSize = blacklist.size();
final double failureThreshold = this.blacklistDisableFailureThreshold * numberOfNodeManagerHosts;
if (currentBlacklistSize < failureThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("blacklist size " + currentBlacklistSize + " is less than " + "failure threshold ratio " + blacklistDisableFailureThreshold + " out of total usable nodes " + numberOfNodeManagerHosts);
}
ret = ResourceBlacklistRequest.newInstance(blacklist, EMPTY_LIST);
} else {
LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize + " is more than failure threshold ratio " + blacklistDisableFailureThreshold + " out of total usable nodes " + numberOfNodeManagerHosts);
// TODO: After the threshold hits, we will keep sending a long list
// every time a new AM is to be scheduled.
ret = ResourceBlacklistRequest.newInstance(EMPTY_LIST, blacklist);
}
return ret;
}
Aggregations