use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class FifoScheduler method removeNode.
private synchronized void removeNode(RMNode nodeInfo) {
FiCaSchedulerNode node = nodeTracker.getNode(nodeInfo.getNodeID());
if (node == null) {
return;
}
// Kill running containers
for (RMContainer container : node.getCopiedListOfRunningContainers()) {
super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
}
nodeTracker.removeNode(nodeInfo.getNodeID());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class FifoScheduler method completedContainerInternal.
@Lock(FifoScheduler.class)
@Override
protected synchronized void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
// Get the application for the finished container
Container container = rmContainer.getContainer();
FifoAppAttempt application = getCurrentAttemptForContainer(container.getId());
ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId();
// Get the node on which the container was allocated
FiCaSchedulerNode node = (FiCaSchedulerNode) getNode(container.getNodeId());
if (application == null) {
LOG.info("Unknown application: " + appId + " released container " + container.getId() + " on node: " + node + " with event: " + event);
return;
}
// Inform the application
application.containerCompleted(rmContainer, containerStatus, event, RMNodeLabelsManager.NO_LABEL);
// Inform the node
node.releaseContainer(rmContainer.getContainerId(), false);
// Update total usage
Resources.subtractFrom(usedResource, container.getResource());
LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class FSPreemptionThread method preemptContainers.
private void preemptContainers(List<RMContainer> containers) {
// Warn application about containers to be killed
for (RMContainer container : containers) {
ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId);
LOG.info("Preempting container " + container + " from queue " + app.getQueueName());
app.trackContainerForPreemption(container);
}
// Schedule timer task to kill containers
preemptionTimer.schedule(new PreemptContainersTask(containers), warnTimeBeforeKill);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class AbstractYarnScheduler method recoverContainersOnNode.
public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
try {
writeLock.lock();
if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
return;
}
for (NMContainerStatus container : containerReports) {
ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
RMApp rmApp = rmContext.getRMApps().get(appId);
if (rmApp == null) {
LOG.error("Skip recovering container " + container + " for unknown application.");
killOrphanContainerOnNode(nm, container);
continue;
}
SchedulerApplication<T> schedulerApp = applications.get(appId);
if (schedulerApp == null) {
LOG.info("Skip recovering container " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
killOrphanContainerOnNode(nm, container);
continue;
}
LOG.info("Recovering container " + container);
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
// Do not recover containers for stopped attempt or previous attempt.
if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
LOG.info("Skip recovering container " + container + " for already stopped attempt.");
killOrphanContainerOnNode(nm, container);
continue;
}
}
// create container
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
// recover RMContainer
rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
// recover scheduler node
SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
schedulerNode.recoverContainer(rmContainer);
// recover queue: update headroom etc.
Queue queue = schedulerAttempt.getQueue();
queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
// recover scheduler attempt
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
// set master container for the current running AMContainer for this
// attempt.
RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
if (appAttempt != null) {
Container masterContainer = appAttempt.getMasterContainer();
// container ID stored in AppAttempt.
if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
((RMContainerImpl) rmContainer).setAMContainer(true);
}
}
if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
LOG.info(container.getContainerId() + " is released by application.");
}
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class AbstractYarnScheduler method createSchedContainerChangeRequest.
/**
* Sanity check increase/decrease request, and return
* SchedulerContainerResourceChangeRequest according to given
* UpdateContainerRequest.
*
* <pre>
* - Returns non-null value means validation succeeded
* - Throw exception when any other error happens
* </pre>
*/
private SchedContainerChangeRequest createSchedContainerChangeRequest(UpdateContainerRequest request, boolean increase) throws YarnException {
ContainerId containerId = request.getContainerId();
RMContainer rmContainer = getRMContainer(containerId);
if (null == rmContainer) {
String msg = "Failed to get rmContainer for " + (increase ? "increase" : "decrease") + " request, with container-id=" + containerId;
throw new InvalidResourceRequestException(msg);
}
SchedulerNode schedulerNode = getSchedulerNode(rmContainer.getAllocatedNode());
return new SchedContainerChangeRequest(this.rmContext, schedulerNode, rmContainer, request.getCapability());
}
Aggregations