use of org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException in project hadoop by apache.
the class RMAppManager method createAndPopulateNewRMApp.
private RMAppImpl createAndPopulateNewRMApp(ApplicationSubmissionContext submissionContext, long submitTime, String user, boolean isRecovery, long startTime) throws YarnException {
if (!isRecovery) {
// Do queue mapping
if (rmContext.getQueuePlacementManager() != null) {
// We only do queue mapping when it's a new application
rmContext.getQueuePlacementManager().placeApplication(submissionContext, user);
}
// fail the submission if configured application timeout value is invalid
RMServerUtils.validateApplicationTimeouts(submissionContext.getApplicationTimeouts());
}
ApplicationId applicationId = submissionContext.getApplicationId();
ResourceRequest amReq = null;
try {
amReq = validateAndCreateResourceRequest(submissionContext, isRecovery);
} catch (InvalidLabelResourceRequestException e) {
// after recovery and user can see what's going on and react accordingly.
if (isRecovery && !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
if (LOG.isDebugEnabled()) {
LOG.debug("AMResourceRequest is not created for " + applicationId + ". NodeLabel is not enabled in cluster, but AM resource " + "request contains a label expression.");
}
} else {
throw e;
}
}
// Verify and get the update application priority and set back to
// submissionContext
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
Priority appPriority = scheduler.checkAndGetApplicationPriority(submissionContext.getPriority(), userUgi, submissionContext.getQueue(), applicationId);
submissionContext.setPriority(appPriority);
// For now, exclude FS for the acl check.
if (!isRecovery && YarnConfiguration.isAclEnabled(conf) && scheduler instanceof CapacityScheduler) {
String queueName = submissionContext.getQueue();
String appName = submissionContext.getApplicationName();
CSQueue csqueue = ((CapacityScheduler) scheduler).getQueue(queueName);
if (null != csqueue && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.SUBMIT_APPLICATIONS), applicationId.toString(), appName, Server.getRemoteAddress(), null)) && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.ADMINISTER_QUEUE), applicationId.toString(), appName, Server.getRemoteAddress(), null))) {
throw RPCUtil.getRemoteException(new AccessControlException("User " + user + " does not have permission to submit " + applicationId + " to queue " + submissionContext.getQueue()));
}
}
// Create RMApp
RMAppImpl application = new RMAppImpl(applicationId, rmContext, this.conf, submissionContext.getApplicationName(), user, submissionContext.getQueue(), submissionContext, this.scheduler, this.masterService, submitTime, submissionContext.getApplicationType(), submissionContext.getApplicationTags(), amReq, startTime);
// influence each other
if (rmContext.getRMApps().putIfAbsent(applicationId, application) != null) {
String message = "Application with id " + applicationId + " is already present! Cannot add a duplicate!";
LOG.warn(message);
throw new YarnException(message);
}
if (YarnConfiguration.timelineServiceV2Enabled(conf)) {
// Start timeline collector for the submitted app
application.startTimelineCollector();
}
// Inform the ACLs Manager
this.applicationACLsManager.addApplication(applicationId, submissionContext.getAMContainerSpec().getApplicationACLs());
String appViewACLs = submissionContext.getAMContainerSpec().getApplicationACLs().get(ApplicationAccessType.VIEW_APP);
rmContext.getSystemMetricsPublisher().appACLsUpdated(application, appViewACLs, System.currentTimeMillis());
return application;
}
use of org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException in project hadoop by apache.
the class TestSchedulerUtils method testValidateResourceRequestWithErrorLabelsPermission.
@Test(timeout = 30000)
public void testValidateResourceRequestWithErrorLabelsPermission() throws IOException {
// mock queue and scheduler
YarnScheduler scheduler = mock(YarnScheduler.class);
Set<String> queueAccessibleNodeLabels = Sets.newHashSet();
QueueInfo queueInfo = mock(QueueInfo.class);
when(queueInfo.getQueueName()).thenReturn("queue");
when(queueInfo.getAccessibleNodeLabels()).thenReturn(queueAccessibleNodeLabels);
when(scheduler.getQueueInfo(any(String.class), anyBoolean(), anyBoolean())).thenReturn(queueInfo);
Resource maxResource = Resources.createResource(YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
// queue has labels, success cases
try {
// set queue accessible node labesl to [x, y]
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y"));
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression("y");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression("");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression(" ");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
} catch (InvalidResourceRequestException e) {
e.printStackTrace();
fail("Should be valid when request labels is a subset of queue labels");
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x", "y"));
}
// requested. should fail
try {
// set queue accessible node labesl to [x, y]
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y"));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
}
// queue has labels, failed cases (when ask a label not included by queue)
try {
// set queue accessible node labesl to [x, y]
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y"));
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("z");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x", "y"));
}
// now
try {
// set queue accessible node labesl to [x, y]
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y"));
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x && y");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x", "y"));
}
// queue doesn't have label, succeed (when request no label)
queueAccessibleNodeLabels.clear();
try {
// set queue accessible node labels to empty
queueAccessibleNodeLabels.clear();
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression("");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression(" ");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
} catch (InvalidResourceRequestException e) {
e.printStackTrace();
fail("Should be valid when request labels is empty");
}
boolean invalidlabelexception = false;
// queue doesn't have label, failed (when request any label)
try {
// set queue accessible node labels to empty
queueAccessibleNodeLabels.clear();
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidLabelResourceRequestException e) {
invalidlabelexception = true;
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x"));
}
Assert.assertTrue("InvalidLabelResourceRequestException excpeted", invalidlabelexception);
// queue is "*", always succeeded
try {
// set queue accessible node labels to empty
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.add(RMNodeLabelsManager.ANY);
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y"), NodeLabel.newInstance("z")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression("y");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
resReq.setNodeLabelExpression("z");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
} catch (InvalidResourceRequestException e) {
e.printStackTrace();
fail("Should be valid when queue can access any labels");
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x", "y", "z"));
}
// same as above, but cluster node labels don't contains label, should fail
try {
// set queue accessible node labels to empty
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.add(RMNodeLabelsManager.ANY);
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
}
// we don't allow resource name other than ANY and specify label
try {
// set queue accessible node labesl to [x, y]
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y"));
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), "rack", resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x", "y"));
}
// queue has accessible label = *
try {
// set queue accessible node labesl to *
queueAccessibleNodeLabels.clear();
queueAccessibleNodeLabels.addAll(Arrays.asList(CommonNodeLabelsManager.ANY));
rmContext.getNodeLabelManager().addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x")));
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), "rack", resource, 1);
resReq.setNodeLabelExpression("x");
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(Arrays.asList("x"));
}
try {
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq1 = BuilderUtils.newResourceRequest(mock(Priority.class), "*", resource, 1, "x");
SchedulerUtils.normalizeAndvalidateRequest(resReq1, maxResource, "queue", scheduler, rmContext);
fail("Should fail");
} catch (InvalidResourceRequestException e) {
assertEquals("Invalid label resource request, cluster do not contain , " + "label= x", e.getMessage());
}
try {
rmContext.getYarnConfiguration().set(YarnConfiguration.NODE_LABELS_ENABLED, "false");
Resource resource = Resources.createResource(0, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
ResourceRequest resReq1 = BuilderUtils.newResourceRequest(mock(Priority.class), "*", resource, 1, "x");
SchedulerUtils.normalizeAndvalidateRequest(resReq1, maxResource, "queue", scheduler, rmContext);
Assert.assertEquals(RMNodeLabelsManager.NO_LABEL, resReq1.getNodeLabelExpression());
} catch (InvalidResourceRequestException e) {
assertEquals("Invalid resource request, node label not enabled but " + "request contains label expression", e.getMessage());
}
}
use of org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException in project hadoop by apache.
the class SchedulerUtils method normalizeAndValidateRequest.
public static void normalizeAndValidateRequest(ResourceRequest resReq, Resource maximumResource, String queueName, YarnScheduler scheduler, boolean isRecovery, RMContext rmContext, QueueInfo queueInfo) throws InvalidResourceRequestException {
Configuration conf = rmContext.getYarnConfiguration();
// If Node label is not enabled throw exception
if (null != conf && !YarnConfiguration.areNodeLabelsEnabled(conf)) {
String labelExp = resReq.getNodeLabelExpression();
if (!(RMNodeLabelsManager.NO_LABEL.equals(labelExp) || null == labelExp)) {
throw new InvalidLabelResourceRequestException("Invalid resource request, node label not enabled " + "but request contains label expression");
}
}
if (null == queueInfo) {
try {
queueInfo = scheduler.getQueueInfo(queueName, false, false);
} catch (IOException e) {
// it is possible queue cannot get when queue mapping is set, just ignore
// the queueInfo here, and move forward
}
}
SchedulerUtils.normalizeNodeLabelExpressionInRequest(resReq, queueInfo);
if (!isRecovery) {
validateResourceRequest(resReq, maximumResource, queueInfo, rmContext);
}
}
Aggregations