use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.
the class FifoScheduler method addApplication.
@VisibleForTesting
public synchronized void addApplication(ApplicationId applicationId, String queue, String user, boolean isAppRecovering) {
SchedulerApplication<FifoAppAttempt> application = new SchedulerApplication<>(DEFAULT_QUEUE, user);
applications.put(applicationId, application);
metrics.submitApp(user);
LOG.info("Accepted application " + applicationId + " from user: " + user + ", currently num of applications: " + applications.size());
if (isAppRecovering) {
if (LOG.isDebugEnabled()) {
LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
}
} else {
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.
the class FairScheduler method addApplication.
/**
* Add a new application to the scheduler, with a given id, queue name, and
* user. This will accept a new app even if the user or queue is above
* configured limits, but the app will not be marked as runnable.
*/
protected void addApplication(ApplicationId applicationId, String queueName, String user, boolean isAppRecovering) {
if (queueName == null || queueName.isEmpty()) {
String message = "Reject application " + applicationId + " submitted by user " + user + " with an empty queue name.";
LOG.info(message);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
if (queueName.startsWith(".") || queueName.endsWith(".")) {
String message = "Reject application " + applicationId + " submitted by user " + user + " with an illegal queue name " + queueName + ". " + "The queue name cannot start/end with period.";
LOG.info(message);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
try {
writeLock.lock();
RMApp rmApp = rmContext.getRMApps().get(applicationId);
FSLeafQueue queue = assignToQueue(rmApp, queueName, user);
if (queue == null) {
return;
}
// Enforce ACLs
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
if (!queue.hasAccess(QueueACL.SUBMIT_APPLICATIONS, userUgi) && !queue.hasAccess(QueueACL.ADMINISTER_QUEUE, userUgi)) {
String msg = "User " + userUgi.getUserName() + " cannot submit applications to queue " + queue.getName() + "(requested queuename is " + queueName + ")";
LOG.info(msg);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, msg));
return;
}
SchedulerApplication<FSAppAttempt> application = new SchedulerApplication<FSAppAttempt>(queue, user);
applications.put(applicationId, application);
queue.getMetrics().submitApp(user);
LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queue.getName() + ", currently num of applications: " + applications.size());
if (isAppRecovering) {
if (LOG.isDebugEnabled()) {
LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
}
} else {
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.
the class FifoScheduler method assignContainers.
/**
* Heart of the scheduler...
*
* @param node node on which resources are available to be allocated
*/
private void assignContainers(FiCaSchedulerNode node) {
LOG.debug("assignContainers:" + " node=" + node.getRMNode().getNodeAddress() + " #applications=" + applications.size());
// Try to assign containers to applications in fifo order
for (Map.Entry<ApplicationId, SchedulerApplication<FifoAppAttempt>> e : applications.entrySet()) {
FifoAppAttempt application = e.getValue().getCurrentAppAttempt();
if (application == null) {
continue;
}
LOG.debug("pre-assignContainers");
application.showRequests();
synchronized (application) {
// Check if this resource is on the blacklist
if (SchedulerAppUtils.isPlaceBlacklisted(application, node, LOG)) {
continue;
}
for (SchedulerRequestKey schedulerKey : application.getSchedulerKeys()) {
int maxContainers = getMaxAllocatableContainers(application, schedulerKey, node, NodeType.OFF_SWITCH);
// Ensure the application needs containers of this priority
if (maxContainers > 0) {
int assignedContainers = assignContainersOnNode(node, application, schedulerKey);
// Do not assign out of order w.r.t priorities
if (assignedContainers == 0) {
break;
}
}
}
}
LOG.debug("post-assignContainers");
application.showRequests();
// Done
if (Resources.lessThan(resourceCalculator, getClusterResource(), node.getUnallocatedResource(), minimumAllocation)) {
break;
}
}
// account the containers assigned in this update.
for (SchedulerApplication<FifoAppAttempt> application : applications.values()) {
FifoAppAttempt attempt = (FifoAppAttempt) application.getCurrentAppAttempt();
if (attempt == null) {
continue;
}
updateAppHeadRoom(attempt);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.
the class CapacityScheduler method addApplication.
private void addApplication(ApplicationId applicationId, String queueName, String user, Priority priority) {
try {
writeLock.lock();
if (isSystemAppsLimitReached()) {
String message = "Maximum system application limit reached," + "cannot accept submission of application: " + applicationId;
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
// Sanity checks.
CSQueue queue = getQueue(queueName);
if (queue == null) {
String message = "Application " + applicationId + " submitted by user " + user + " to unknown queue: " + queueName;
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
if (!(queue instanceof LeafQueue)) {
String message = "Application " + applicationId + " submitted by user " + user + " to non-leaf queue: " + queueName;
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
// Submit to the queue
try {
queue.submitApplication(applicationId, user, queueName);
} catch (AccessControlException ace) {
LOG.info("Failed to submit application " + applicationId + " to queue " + queueName + " from user " + user, ace);
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, ace.toString()));
return;
}
// update the metrics
queue.getMetrics().submitApp(user);
SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(queue, user, priority);
applications.put(applicationId, application);
LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queueName);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.
the class TestWorkPreservingRMRestart method testCapacitySchedulerRecovery.
// Test CS recovery with multi-level queues and multi-users:
// 1. setup 2 NMs each with 8GB memory;
// 2. setup 2 level queues: Default -> (QueueA, QueueB)
// 3. User1 submits 2 apps on QueueA
// 4. User2 submits 1 app on QueueB
// 5. AM and each container has 1GB memory
// 6. Restart RM.
// 7. nm1 re-syncs back containers belong to user1
// 8. nm2 re-syncs back containers belong to user2.
// 9. Assert the parent queue and 2 leaf queues state and the metrics.
// 10. Assert each user's consumption inside the queue.
@Test(timeout = 30000)
public void testCapacitySchedulerRecovery() throws Exception {
if (getSchedulerType() != SchedulerType.CAPACITY) {
return;
}
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfiguration(csConf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(csConf);
rm1 = new MockRM(csConf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A);
MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1);
RMApp app1_2 = rm1.submitApp(1024, "app1_2", USER_1, null, A);
MockAM am1_2 = MockRM.launchAndRegisterAM(app1_2, rm1, nm2);
RMApp app2 = rm1.submitApp(1024, "app2", USER_2, null, B);
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// clear queue metrics
rm1.clearQueueMetrics(app1_1);
rm1.clearQueueMetrics(app1_2);
rm1.clearQueueMetrics(app2);
csConf.set(PREFIX + "root.Default.QueueB.state", "STOPPED");
// Re-start RM
rm2 = new MockRM(csConf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm2.setResourceTrackerService(rm2.getResourceTrackerService());
List<NMContainerStatus> am1_1Containers = createNMContainerStatusForApp(am1_1);
List<NMContainerStatus> am1_2Containers = createNMContainerStatusForApp(am1_2);
am1_1Containers.addAll(am1_2Containers);
nm1.registerNode(am1_1Containers, null);
List<NMContainerStatus> am2Containers = createNMContainerStatusForApp(am2);
nm2.registerNode(am2Containers, null);
// Wait for RM to settle down on recovering containers;
waitForNumContainersToRecover(2, rm2, am1_1.getApplicationAttemptId());
waitForNumContainersToRecover(2, rm2, am1_2.getApplicationAttemptId());
waitForNumContainersToRecover(2, rm2, am2.getApplicationAttemptId());
// Calculate each queue's resource usage.
Resource containerResource = Resource.newInstance(1024, 1);
Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
Resource clusterResource = Resources.multiply(nmResource, 2);
Resource q1Resource = Resources.multiply(clusterResource, 0.5);
Resource q2Resource = Resources.multiply(clusterResource, 0.5);
Resource q1UsedResource = Resources.multiply(containerResource, 4);
Resource q2UsedResource = Resources.multiply(containerResource, 2);
Resource totalUsedResource = Resources.add(q1UsedResource, q2UsedResource);
Resource q1availableResources = Resources.subtract(q1Resource, q1UsedResource);
Resource q2availableResources = Resources.subtract(q2Resource, q2UsedResource);
Resource totalAvailableResource = Resources.add(q1availableResources, q2availableResources);
Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
SchedulerApplication schedulerApp1_1 = schedulerApps.get(app1_1.getApplicationId());
// assert queue A state.
checkCSLeafQueue(rm2, schedulerApp1_1, clusterResource, q1Resource, q1UsedResource, 4);
QueueMetrics queue1Metrics = schedulerApp1_1.getQueue().getMetrics();
assertMetrics(queue1Metrics, 2, 0, 2, 0, 4, q1availableResources.getMemorySize(), q1availableResources.getVirtualCores(), q1UsedResource.getMemorySize(), q1UsedResource.getVirtualCores());
// assert queue B state.
SchedulerApplication schedulerApp2 = schedulerApps.get(app2.getApplicationId());
checkCSLeafQueue(rm2, schedulerApp2, clusterResource, q2Resource, q2UsedResource, 2);
QueueMetrics queue2Metrics = schedulerApp2.getQueue().getMetrics();
assertMetrics(queue2Metrics, 1, 0, 1, 0, 2, q2availableResources.getMemorySize(), q2availableResources.getVirtualCores(), q2UsedResource.getMemorySize(), q2UsedResource.getVirtualCores());
// assert parent queue state.
LeafQueue leafQueue = (LeafQueue) schedulerApp2.getQueue();
ParentQueue parentQueue = (ParentQueue) leafQueue.getParent();
checkParentQueue(parentQueue, 6, totalUsedResource, (float) 6 / 16, (float) 6 / 16);
assertMetrics(parentQueue.getMetrics(), 3, 0, 3, 0, 6, totalAvailableResource.getMemorySize(), totalAvailableResource.getVirtualCores(), totalUsedResource.getMemorySize(), totalUsedResource.getVirtualCores());
}
Aggregations