use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class RMAppManager method createAndPopulateNewRMApp.
private RMAppImpl createAndPopulateNewRMApp(ApplicationSubmissionContext submissionContext, long submitTime, String user, boolean isRecovery, long startTime) throws YarnException {
if (!isRecovery) {
// Do queue mapping
if (rmContext.getQueuePlacementManager() != null) {
// We only do queue mapping when it's a new application
rmContext.getQueuePlacementManager().placeApplication(submissionContext, user);
}
// fail the submission if configured application timeout value is invalid
RMServerUtils.validateApplicationTimeouts(submissionContext.getApplicationTimeouts());
}
ApplicationId applicationId = submissionContext.getApplicationId();
ResourceRequest amReq = null;
try {
amReq = validateAndCreateResourceRequest(submissionContext, isRecovery);
} catch (InvalidLabelResourceRequestException e) {
// after recovery and user can see what's going on and react accordingly.
if (isRecovery && !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
if (LOG.isDebugEnabled()) {
LOG.debug("AMResourceRequest is not created for " + applicationId + ". NodeLabel is not enabled in cluster, but AM resource " + "request contains a label expression.");
}
} else {
throw e;
}
}
// Verify and get the update application priority and set back to
// submissionContext
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
Priority appPriority = scheduler.checkAndGetApplicationPriority(submissionContext.getPriority(), userUgi, submissionContext.getQueue(), applicationId);
submissionContext.setPriority(appPriority);
// For now, exclude FS for the acl check.
if (!isRecovery && YarnConfiguration.isAclEnabled(conf) && scheduler instanceof CapacityScheduler) {
String queueName = submissionContext.getQueue();
String appName = submissionContext.getApplicationName();
CSQueue csqueue = ((CapacityScheduler) scheduler).getQueue(queueName);
if (null != csqueue && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.SUBMIT_APPLICATIONS), applicationId.toString(), appName, Server.getRemoteAddress(), null)) && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.ADMINISTER_QUEUE), applicationId.toString(), appName, Server.getRemoteAddress(), null))) {
throw RPCUtil.getRemoteException(new AccessControlException("User " + user + " does not have permission to submit " + applicationId + " to queue " + submissionContext.getQueue()));
}
}
// Create RMApp
RMAppImpl application = new RMAppImpl(applicationId, rmContext, this.conf, submissionContext.getApplicationName(), user, submissionContext.getQueue(), submissionContext, this.scheduler, this.masterService, submitTime, submissionContext.getApplicationType(), submissionContext.getApplicationTags(), amReq, startTime);
// influence each other
if (rmContext.getRMApps().putIfAbsent(applicationId, application) != null) {
String message = "Application with id " + applicationId + " is already present! Cannot add a duplicate!";
LOG.warn(message);
throw new YarnException(message);
}
if (YarnConfiguration.timelineServiceV2Enabled(conf)) {
// Start timeline collector for the submitted app
application.startTimelineCollector();
}
// Inform the ACLs Manager
this.applicationACLsManager.addApplication(applicationId, submissionContext.getAMContainerSpec().getApplicationACLs());
String appViewACLs = submissionContext.getAMContainerSpec().getApplicationACLs().get(ApplicationAccessType.VIEW_APP);
rmContext.getSystemMetricsPublisher().appACLsUpdated(application, appViewACLs, System.currentTimeMillis());
return application;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class RMWebServices method getSchedulerInfo.
@GET
@Path("/scheduler")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public SchedulerTypeInfo getSchedulerInfo() {
init();
ResourceScheduler rs = rm.getResourceScheduler();
SchedulerInfo sinfo;
if (rs instanceof CapacityScheduler) {
CapacityScheduler cs = (CapacityScheduler) rs;
CSQueue root = cs.getRootQueue();
sinfo = new CapacitySchedulerInfo(root, cs);
} else if (rs instanceof FairScheduler) {
FairScheduler fs = (FairScheduler) rs;
sinfo = new FairSchedulerInfo(fs);
} else if (rs instanceof FifoScheduler) {
sinfo = new FifoSchedulerInfo(this.rm);
} else {
throw new NotFoundException("Unknown scheduler configured");
}
return new SchedulerTypeInfo(sinfo);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class RMWebServices method getApps.
@GET
@Path("/apps")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public AppsInfo getApps(@Context HttpServletRequest hsr, @QueryParam("state") String stateQuery, @QueryParam("states") Set<String> statesQuery, @QueryParam("finalStatus") String finalStatusQuery, @QueryParam("user") String userQuery, @QueryParam("queue") String queueQuery, @QueryParam("limit") String count, @QueryParam("startedTimeBegin") String startedBegin, @QueryParam("startedTimeEnd") String startedEnd, @QueryParam("finishedTimeBegin") String finishBegin, @QueryParam("finishedTimeEnd") String finishEnd, @QueryParam("applicationTypes") Set<String> applicationTypes, @QueryParam("applicationTags") Set<String> applicationTags) {
boolean checkCount = false;
boolean checkStart = false;
boolean checkEnd = false;
boolean checkAppTypes = false;
boolean checkAppStates = false;
boolean checkAppTags = false;
long countNum = 0;
// set values suitable in case both of begin/end not specified
long sBegin = 0;
long sEnd = Long.MAX_VALUE;
long fBegin = 0;
long fEnd = Long.MAX_VALUE;
init();
if (count != null && !count.isEmpty()) {
checkCount = true;
countNum = Long.parseLong(count);
if (countNum <= 0) {
throw new BadRequestException("limit value must be greater then 0");
}
}
if (startedBegin != null && !startedBegin.isEmpty()) {
checkStart = true;
sBegin = Long.parseLong(startedBegin);
if (sBegin < 0) {
throw new BadRequestException("startedTimeBegin must be greater than 0");
}
}
if (startedEnd != null && !startedEnd.isEmpty()) {
checkStart = true;
sEnd = Long.parseLong(startedEnd);
if (sEnd < 0) {
throw new BadRequestException("startedTimeEnd must be greater than 0");
}
}
if (sBegin > sEnd) {
throw new BadRequestException("startedTimeEnd must be greater than startTimeBegin");
}
if (finishBegin != null && !finishBegin.isEmpty()) {
checkEnd = true;
fBegin = Long.parseLong(finishBegin);
if (fBegin < 0) {
throw new BadRequestException("finishTimeBegin must be greater than 0");
}
}
if (finishEnd != null && !finishEnd.isEmpty()) {
checkEnd = true;
fEnd = Long.parseLong(finishEnd);
if (fEnd < 0) {
throw new BadRequestException("finishTimeEnd must be greater than 0");
}
}
if (fBegin > fEnd) {
throw new BadRequestException("finishTimeEnd must be greater than finishTimeBegin");
}
Set<String> appTypes = parseQueries(applicationTypes, false);
if (!appTypes.isEmpty()) {
checkAppTypes = true;
}
Set<String> appTags = parseQueries(applicationTags, false);
if (!appTags.isEmpty()) {
checkAppTags = true;
}
// stateQuery is deprecated.
if (stateQuery != null && !stateQuery.isEmpty()) {
statesQuery.add(stateQuery);
}
Set<String> appStates = parseQueries(statesQuery, true);
if (!appStates.isEmpty()) {
checkAppStates = true;
}
GetApplicationsRequest request = GetApplicationsRequest.newInstance();
if (checkStart) {
request.setStartRange(sBegin, sEnd);
}
if (checkEnd) {
request.setFinishRange(fBegin, fEnd);
}
if (checkCount) {
request.setLimit(countNum);
}
if (checkAppTypes) {
request.setApplicationTypes(appTypes);
}
if (checkAppTags) {
request.setApplicationTags(appTags);
}
if (checkAppStates) {
request.setApplicationStates(appStates);
}
if (queueQuery != null && !queueQuery.isEmpty()) {
ResourceScheduler rs = rm.getResourceScheduler();
if (rs instanceof CapacityScheduler) {
CapacityScheduler cs = (CapacityScheduler) rs;
// validate queue exists
try {
cs.getQueueInfo(queueQuery, false, false);
} catch (IOException e) {
throw new BadRequestException(e.getMessage());
}
}
Set<String> queues = new HashSet<String>(1);
queues.add(queueQuery);
request.setQueues(queues);
}
if (userQuery != null && !userQuery.isEmpty()) {
Set<String> users = new HashSet<String>(1);
users.add(userQuery);
request.setUsers(users);
}
List<ApplicationReport> appReports = null;
try {
appReports = rm.getClientRMService().getApplications(request, false).getApplicationList();
} catch (YarnException e) {
LOG.error("Unable to retrieve apps from ClientRMService", e);
throw new YarnRuntimeException("Unable to retrieve apps from ClientRMService", e);
}
final ConcurrentMap<ApplicationId, RMApp> apps = rm.getRMContext().getRMApps();
AppsInfo allApps = new AppsInfo();
for (ApplicationReport report : appReports) {
RMApp rmapp = apps.get(report.getApplicationId());
if (rmapp == null) {
continue;
}
if (finalStatusQuery != null && !finalStatusQuery.isEmpty()) {
FinalApplicationStatus.valueOf(finalStatusQuery);
if (!rmapp.getFinalApplicationStatus().toString().equalsIgnoreCase(finalStatusQuery)) {
continue;
}
}
AppInfo app = new AppInfo(rm, rmapp, hasAccess(rmapp, hsr), WebAppUtils.getHttpSchemePrefix(conf));
allApps.add(app);
}
return allApps;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class RMWebServices method dumpSchedulerLogs.
@POST
@Path("/scheduler/logs")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public String dumpSchedulerLogs(@FormParam("time") String time, @Context HttpServletRequest hsr) throws IOException {
init();
UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true);
ApplicationACLsManager aclsManager = rm.getApplicationACLsManager();
if (aclsManager.areACLsEnabled()) {
if (callerUGI == null || !aclsManager.isAdmin(callerUGI)) {
String msg = "Only admins can carry out this operation.";
throw new ForbiddenException(msg);
}
}
ResourceScheduler rs = rm.getResourceScheduler();
int period = Integer.parseInt(time);
if (period <= 0) {
throw new BadRequestException("Period must be greater than 0");
}
final String logHierarchy = "org.apache.hadoop.yarn.server.resourcemanager.scheduler";
String logfile = "yarn-scheduler-debug.log";
if (rs instanceof CapacityScheduler) {
logfile = "yarn-capacity-scheduler-debug.log";
} else if (rs instanceof FairScheduler) {
logfile = "yarn-fair-scheduler-debug.log";
}
AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logfile);
// time period is sent to us in seconds
dumper.dumpLogs("DEBUG", period * 1000);
return "Capacity scheduler logs are being created.";
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class TestNodeBlacklistingOnAMFailures method testNodeBlacklistingOnAMFailure.
@Test(timeout = 100000)
public void testNodeBlacklistingOnAMFailure() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = startRM(conf, dispatcher);
CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler();
// Register 5 nodes, so that we can blacklist atleast one if AM container
// is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1.
// First register 2 nodes, and after AM lauched register 3 more nodes.
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService());
nm2.registerNode();
RMApp app = rm.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm1);
ContainerId amContainerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
RMContainer rmContainer = scheduler.getRMContainer(amContainerId);
NodeId nodeWhereAMRan = rmContainer.getAllocatedNode();
MockNM currentNode, otherNode;
if (nodeWhereAMRan.equals(nm1.getNodeId())) {
currentNode = nm1;
otherNode = nm2;
} else {
currentNode = nm2;
otherNode = nm1;
}
// register 3 nodes now
MockNM nm3 = new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService());
nm3.registerNode();
MockNM nm4 = new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService());
nm4.registerNode();
MockNM nm5 = new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService());
nm5.registerNode();
// Set the exist status to INVALID so that we can verify that the system
// automatically blacklisting the node
makeAMContainerExit(rm, amContainerId, currentNode, ContainerExitStatus.INVALID);
// restart the am
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
System.out.println("New AppAttempt launched " + attempt.getAppAttemptId());
// Try the current node a few times
for (int i = 0; i <= 2; i++) {
currentNode.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals("AppAttemptState should still be SCHEDULED if currentNode is " + "blacklisted correctly", RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
}
// Now try the other node
otherNode.nodeHeartbeat(true);
dispatcher.await();
// Now the AM container should be allocated
MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000);
MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
amContainerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
rmContainer = scheduler.getRMContainer(amContainerId);
nodeWhereAMRan = rmContainer.getAllocatedNode();
// The other node should now receive the assignment
Assert.assertEquals("After blacklisting, AM should have run on the other node", otherNode.getNodeId(), nodeWhereAMRan);
am2.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
List<Container> allocatedContainers = TestAMRestart.allocateContainers(currentNode, am2, 1);
Assert.assertEquals("Even though AM is blacklisted from the node, application can " + "still allocate non-AM containers there", currentNode.getNodeId(), allocatedContainers.get(0).getNodeId());
}
Aggregations