use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class MockRM method launchAMWhenAsyncSchedulingEnabled.
public static MockAM launchAMWhenAsyncSchedulingEnabled(RMApp app, MockRM rm) throws Exception {
int i = 0;
while (app.getCurrentAppAttempt() == null) {
if (i < 100) {
i++;
}
Thread.sleep(50);
}
RMAppAttempt attempt = app.getCurrentAppAttempt();
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.ALLOCATED);
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
return am;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class MockRM method launchAM.
/**
* NOTE: nm.nodeHeartbeat is explicitly invoked,
* don't invoke it before calling launchAM
*/
public static MockAM launchAM(RMApp app, MockRM rm, MockNM nm) throws Exception {
rm.drainEventsImplicitly();
RMAppAttempt attempt = waitForAttemptScheduled(app, rm);
LOG.info("Launch AM " + attempt.getAppAttemptId());
nm.nodeHeartbeat(true);
rm.drainEventsImplicitly();
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
return am;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class MockRM method waitForState.
/**
* Wait until an attempt has reached a specified state.
* The timeout can be specified by the parameter.
* @param attemptId the id of an attempt
* @param finalState the attempt state waited
* @param timeoutMsecs the length of timeout in milliseconds
* @throws InterruptedException
* if interrupted while waiting for the state transition
*/
public void waitForState(ApplicationAttemptId attemptId, RMAppAttemptState finalState, int timeoutMsecs) throws InterruptedException {
drainEventsImplicitly();
RMApp app = getRMContext().getRMApps().get(attemptId.getApplicationId());
Assert.assertNotNull("app shouldn't be null", app);
RMAppAttempt attempt = app.getRMAppAttempt(attemptId);
MockRM.waitForState(attempt, finalState, timeoutMsecs);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class MockAM method waitForState.
/**
* Wait until an attempt has reached a specified state.
* The timeout is 40 seconds.
* @param finalState the attempt state waited
* @throws InterruptedException
* if interrupted while waiting for the state transition
*/
private void waitForState(RMAppAttemptState finalState) throws InterruptedException {
RMApp app = context.getRMApps().get(attemptId.getApplicationId());
RMAppAttempt attempt = app.getRMAppAttempt(attemptId);
MockRM.waitForState(attempt, finalState);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.
the class AbstractYarnScheduler method recoverContainersOnNode.
public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
try {
writeLock.lock();
if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
return;
}
for (NMContainerStatus container : containerReports) {
ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
RMApp rmApp = rmContext.getRMApps().get(appId);
if (rmApp == null) {
LOG.error("Skip recovering container " + container + " for unknown application.");
killOrphanContainerOnNode(nm, container);
continue;
}
SchedulerApplication<T> schedulerApp = applications.get(appId);
if (schedulerApp == null) {
LOG.info("Skip recovering container " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
killOrphanContainerOnNode(nm, container);
continue;
}
LOG.info("Recovering container " + container);
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
// Do not recover containers for stopped attempt or previous attempt.
if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
LOG.info("Skip recovering container " + container + " for already stopped attempt.");
killOrphanContainerOnNode(nm, container);
continue;
}
}
// create container
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
// recover RMContainer
rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
// recover scheduler node
SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
schedulerNode.recoverContainer(rmContainer);
// recover queue: update headroom etc.
Queue queue = schedulerAttempt.getQueue();
queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
// recover scheduler attempt
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
// set master container for the current running AMContainer for this
// attempt.
RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
if (appAttempt != null) {
Container masterContainer = appAttempt.getMasterContainer();
// container ID stored in AppAttempt.
if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
((RMContainerImpl) rmContainer).setAMContainer(true);
}
}
if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
LOG.info(container.getContainerId() + " is released by application.");
}
}
} finally {
writeLock.unlock();
}
}
Aggregations