use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class RMAppManager method recoverApplication.
protected void recoverApplication(ApplicationStateData appState, RMState rmState) throws Exception {
ApplicationSubmissionContext appContext = appState.getApplicationSubmissionContext();
ApplicationId appId = appContext.getApplicationId();
// create and recover app.
RMAppImpl application = createAndPopulateNewRMApp(appContext, appState.getSubmitTime(), appState.getUser(), true, appState.getStartTime());
// is true and give clear message so that user can react properly.
if (!appContext.getUnmanagedAM() && application.getAMResourceRequest() == null && !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
// check application submission context and see if am resource request
// or application itself contains any node label expression.
ResourceRequest amReqFromAppContext = appContext.getAMContainerResourceRequest();
String labelExp = (amReqFromAppContext != null) ? amReqFromAppContext.getNodeLabelExpression() : null;
if (labelExp == null) {
labelExp = appContext.getNodeLabelExpression();
}
if (labelExp != null && !labelExp.equals(RMNodeLabelsManager.NO_LABEL)) {
String message = "Failed to recover application " + appId + ". NodeLabel is not enabled in cluster, but AM resource request " + "contains a label expression.";
LOG.warn(message);
application.handle(new RMAppEvent(appId, RMAppEventType.APP_REJECTED, message));
return;
}
}
application.handle(new RMAppRecoverEvent(appId, rmState));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class CapacityScheduler method addApplicationOnRecovery.
private void addApplicationOnRecovery(ApplicationId applicationId, String queueName, String user, Priority priority) {
try {
writeLock.lock();
CSQueue queue = getQueue(queueName);
if (queue == null) {
//not presently supported
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it was submitted to queue " + queueName + " which no longer exists after restart."));
return;
} else {
String queueErrorMsg = "Queue named " + queueName + " missing during application recovery." + " Queue removal during recovery is not presently " + "supported by the capacity scheduler, please " + "restart with all queues configured" + " which were present before shutdown/restart.";
LOG.fatal(queueErrorMsg);
throw new QueueInvalidException(queueErrorMsg);
}
}
if (!(queue instanceof LeafQueue)) {
// queue, which is not supported for running apps.
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it was submitted to queue " + queueName + " which is no longer a leaf queue after restart."));
return;
} else {
String queueErrorMsg = "Queue named " + queueName + " is no longer a leaf queue during application recovery." + " Changing a leaf queue to a parent queue during recovery is" + " not presently supported by the capacity scheduler. Please" + " restart with leaf queues before shutdown/restart continuing" + " as leaf queues.";
LOG.fatal(queueErrorMsg);
throw new QueueInvalidException(queueErrorMsg);
}
}
// Submit to the queue
try {
queue.submitApplication(applicationId, user, queueName);
} catch (AccessControlException ace) {
// Ignore the exception for recovered app as the app was previously
// accepted.
}
queue.getMetrics().submitApp(user);
SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(queue, user, priority);
applications.put(applicationId, application);
LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queueName);
if (LOG.isDebugEnabled()) {
LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class FairScheduler method resolveReservationQueueName.
private String resolveReservationQueueName(String queueName, ApplicationId applicationId, ReservationId reservationID, boolean isRecovering) {
try {
readLock.lock();
FSQueue queue = queueMgr.getQueue(queueName);
if ((queue == null) || !allocConf.isReservable(queue.getQueueName())) {
return queueName;
}
// Use fully specified name from now on (including root. prefix)
queueName = queue.getQueueName();
if (reservationID != null) {
String resQName = queueName + "." + reservationID.toString();
queue = queueMgr.getQueue(resQName);
if (queue == null) {
// reservation has terminated during failover
if (isRecovering && allocConf.getMoveOnExpiry(queueName)) {
// move to the default child queue of the plan
return getDefaultQueueForPlanQueue(queueName);
}
String message = "Application " + applicationId + " submitted to a reservation which is not yet " + "currently active: " + resQName;
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return null;
}
if (!queue.getParent().getQueueName().equals(queueName)) {
String message = "Application: " + applicationId + " submitted to a reservation " + resQName + " which does not belong to the specified queue: " + queueName;
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return null;
}
// use the reservation queue to run the app
queueName = resQName;
} else {
// use the default child queue of the plan for unreserved apps
queueName = getDefaultQueueForPlanQueue(queueName);
}
return queueName;
} finally {
readLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class AbstractYarnScheduler method killAllAppsInQueue.
@Override
public void killAllAppsInQueue(String queueName) throws YarnException {
try {
writeLock.lock();
// check if queue is a valid
List<ApplicationAttemptId> apps = getAppsInQueue(queueName);
if (apps == null) {
String errMsg = "The specified Queue: " + queueName + " doesn't exist";
LOG.warn(errMsg);
throw new YarnException(errMsg);
}
// generate kill events for each pending/running app
for (ApplicationAttemptId app : apps) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(app.getApplicationId(), RMAppEventType.KILL, "Application killed due to expiry of reservation queue " + queueName + "."));
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class RMAppLifetimeMonitor method expire.
@SuppressWarnings("unchecked")
@Override
protected synchronized void expire(RMAppToMonitor monitoredAppKey) {
ApplicationId appId = monitoredAppKey.getApplicationId();
RMApp app = rmContext.getRMApps().get(appId);
if (app == null) {
return;
}
String diagnostics = "Application is killed by ResourceManager as it" + " has exceeded the lifetime period.";
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(appId, RMAppEventType.KILL, diagnostics));
}
Aggregations