use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl in project hadoop by apache.
the class FSAppAttempt method allocate.
public RMContainer allocate(NodeType type, FSSchedulerNode node, SchedulerRequestKey schedulerKey, PendingAsk pendingAsk, Container reservedContainer) {
RMContainer rmContainer;
Container container;
try {
writeLock.lock();
// Update allowed locality level
NodeType allowed = allowedLocalityLevel.get(schedulerKey);
if (allowed != null) {
if (allowed.equals(NodeType.OFF_SWITCH) && (type.equals(NodeType.NODE_LOCAL) || type.equals(NodeType.RACK_LOCAL))) {
this.resetAllowedLocalityLevel(schedulerKey, type);
} else if (allowed.equals(NodeType.RACK_LOCAL) && type.equals(NodeType.NODE_LOCAL)) {
this.resetAllowedLocalityLevel(schedulerKey, type);
}
}
// request without locking the scheduler, hence we need to check
if (getOutstandingAsksCount(schedulerKey) <= 0) {
return null;
}
container = reservedContainer;
if (container == null) {
container = createContainer(node, pendingAsk.getPerAllocationResource(), schedulerKey);
}
// Create RMContainer
rmContainer = new RMContainerImpl(container, schedulerKey, getApplicationAttemptId(), node.getNodeID(), appSchedulingInfo.getUser(), rmContext);
((RMContainerImpl) rmContainer).setQueueName(this.getQueueName());
// Add it to allContainers list.
addToNewlyAllocatedContainers(node, rmContainer);
liveContainers.put(container.getId(), rmContainer);
// Update consumption and track allocations
List<ResourceRequest> resourceRequestList = appSchedulingInfo.allocate(type, node, schedulerKey, container);
this.attemptResourceUsage.incUsed(container.getResource());
// Update resource requests related to "request" and store in RMContainer
((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList);
// Inform the container
rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START));
if (LOG.isDebugEnabled()) {
LOG.debug("allocate: applicationAttemptId=" + container.getId().getApplicationAttemptId() + " container=" + container.getId() + " host=" + container.getNodeId().getHost() + " type=" + type);
}
RMAuditLogger.logSuccess(getUser(), AuditConstants.ALLOC_CONTAINER, "SchedulerApp", getApplicationId(), container.getId(), container.getResource());
} finally {
writeLock.unlock();
}
return rmContainer;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl in project hadoop by apache.
the class AbstractYarnScheduler method recoverContainersOnNode.
public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
try {
writeLock.lock();
if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
return;
}
for (NMContainerStatus container : containerReports) {
ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
RMApp rmApp = rmContext.getRMApps().get(appId);
if (rmApp == null) {
LOG.error("Skip recovering container " + container + " for unknown application.");
killOrphanContainerOnNode(nm, container);
continue;
}
SchedulerApplication<T> schedulerApp = applications.get(appId);
if (schedulerApp == null) {
LOG.info("Skip recovering container " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
killOrphanContainerOnNode(nm, container);
continue;
}
LOG.info("Recovering container " + container);
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
// Do not recover containers for stopped attempt or previous attempt.
if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
LOG.info("Skip recovering container " + container + " for already stopped attempt.");
killOrphanContainerOnNode(nm, container);
continue;
}
}
// create container
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
// recover RMContainer
rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
// recover scheduler node
SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
schedulerNode.recoverContainer(rmContainer);
// recover queue: update headroom etc.
Queue queue = schedulerAttempt.getQueue();
queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
// recover scheduler attempt
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
// set master container for the current running AMContainer for this
// attempt.
RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
if (appAttempt != null) {
Container masterContainer = appAttempt.getMasterContainer();
// container ID stored in AppAttempt.
if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
((RMContainerImpl) rmContainer).setAMContainer(true);
}
}
if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
LOG.info(container.getContainerId() + " is released by application.");
}
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl in project hadoop by apache.
the class AbstractYarnScheduler method createDecreasedRMContainer.
private RMContainer createDecreasedRMContainer(SchedulerApplicationAttempt appAttempt, UpdateContainerRequest uReq, RMContainer rmContainer) {
SchedulerRequestKey sk = SchedulerRequestKey.extractFrom(rmContainer.getContainer());
Container decreasedContainer = BuilderUtils.newContainer(ContainerId.newContainerId(appAttempt.getApplicationAttemptId(), appAttempt.getNewContainerId()), rmContainer.getContainer().getNodeId(), rmContainer.getContainer().getNodeHttpAddress(), Resources.none(), sk.getPriority(), null, rmContainer.getExecutionType(), sk.getAllocationRequestId());
decreasedContainer.setVersion(rmContainer.getContainer().getVersion());
RMContainer newRmContainer = new RMContainerImpl(decreasedContainer, sk, appAttempt.getApplicationAttemptId(), decreasedContainer.getNodeId(), appAttempt.getUser(), rmContext, rmContainer.isRemotelyAllocated());
appAttempt.addRMContainer(decreasedContainer.getId(), rmContainer);
((AbstractYarnScheduler) rmContext.getScheduler()).getNode(decreasedContainer.getNodeId()).allocateContainer(newRmContainer);
return newRmContainer;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl in project hadoop by apache.
the class TestReservations method testFindNodeToUnreserve.
@Test
public void testFindNodeToUnreserve() throws Exception {
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
setup(csConf);
final String user_0 = "user_0";
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
// Setup resource-requests
Priority p = TestUtils.createMockPriority(5);
SchedulerRequestKey priorityMap = toSchedulerKey(p);
Resource capability = Resources.createResource(2 * GB, 0);
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
RMContext rmContext = mock(RMContext.class);
ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);
DrainDispatcher drainDispatcher = new DrainDispatcher();
when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(app_0.getApplicationId(), 1);
ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
Container container = TestUtils.getMockContainer(containerId, node_1.getNodeID(), Resources.createResource(2 * GB), priorityMap.getPriority());
RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), appAttemptId, node_1.getNodeID(), "user", rmContext);
// nothing reserved
RMContainer toUnreserveContainer = app_0.findNodeToUnreserve(csContext.getClusterResource(), node_1, priorityMap, capability);
assertTrue(toUnreserveContainer == null);
// reserved but scheduler doesn't know about that node.
app_0.reserve(node_1, priorityMap, rmContainer, container);
node_1.reserveResource(app_0, priorityMap, rmContainer);
toUnreserveContainer = app_0.findNodeToUnreserve(csContext.getClusterResource(), node_1, priorityMap, capability);
assertTrue(toUnreserveContainer == null);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl in project hadoop by apache.
the class TestChildQueueOrder method testSortedQueues.
@Test
@SuppressWarnings("unchecked")
public void testSortedQueues() throws Exception {
// Setup queue configs
setupSortedQueues(csConf);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, CapacitySchedulerConfiguration.ROOT, queues, queues, TestUtils.spyHook);
// Setup some nodes
final int memoryPerNode = 10;
final int coresPerNode = 16;
final int numNodes = 1;
FiCaSchedulerNode node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, memoryPerNode * GB);
doNothing().when(node_0).releaseContainer(any(ContainerId.class), anyBoolean());
final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode * GB), numNodes * coresPerNode);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Start testing
CSQueue a = queues.get(A);
CSQueue b = queues.get(B);
CSQueue c = queues.get(C);
CSQueue d = queues.get(D);
// Make a/b/c/d has >0 pending resource, so that allocation will continue.
queues.get(CapacitySchedulerConfiguration.ROOT).getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
a.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
b.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
c.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
d.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
final String user_0 = "user_0";
// Stub an App and its containerCompleted
FiCaSchedulerApp app_0 = getMockApplication(0, user_0);
doReturn(true).when(app_0).containerCompleted(any(RMContainer.class), any(ContainerStatus.class), any(RMContainerEventType.class), any(String.class));
Priority priority = TestUtils.createMockPriority(1);
ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);
DrainDispatcher drainDispatcher = new DrainDispatcher();
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
RMContext rmContext = mock(RMContext.class);
when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(app_0.getApplicationId(), 1);
ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
Container container = TestUtils.getMockContainer(containerId, node_0.getNodeID(), Resources.createResource(1 * GB), priority);
RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), appAttemptId, node_0.getNodeID(), "user", rmContext);
// Assign {1,2,3,4} 1GB containers respectively to queues
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
for (int i = 0; i < 2; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
for (int i = 0; i < 3; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 1 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
for (int i = 0; i < 4; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 1 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
verifyQueueMetrics(a, 1 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 4 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 3 x 1GB containers from D
for (int i = 0; i < 3; i++) {
d.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
}
verifyQueueMetrics(a, 1 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 1 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 2 x 1GB Containers to A
for (int i = 0; i < 2; i++) {
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 1GB Container from A
a.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 2 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 1GB container to B
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 3 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 1GB container resources from B
b.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 2 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 1GB container to A
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Now do the real test, where B and D request a 1GB container
// D should should get the next container if the order is correct
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 1 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
InOrder allocationOrder = inOrder(d, b);
allocationOrder.verify(d).assignContainers(eq(clusterResource), any(PlacementSet.class), any(ResourceLimits.class), any(SchedulingMode.class));
allocationOrder.verify(b).assignContainers(eq(clusterResource), any(PlacementSet.class), any(ResourceLimits.class), any(SchedulingMode.class));
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
//D got the container
verifyQueueMetrics(d, 2 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
}
Aggregations