use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class FifoIntraQueuePreemptionPlugin method getAlreadySelectedPreemptionCandidatesResource.
/*
* Previous policies would have already selected few containers from an
* application. Calculate total resource from these selected containers.
*/
private void getAlreadySelectedPreemptionCandidatesResource(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, TempAppPerPartition tmpApp, String partition) {
tmpApp.selected = Resources.createResource(0, 0);
Set<RMContainer> containers = selectedCandidates.get(tmpApp.app.getApplicationAttemptId());
if (containers == null) {
return;
}
for (RMContainer cont : containers) {
if (partition.equals(cont.getNodeLabelExpression())) {
Resources.addTo(tmpApp.selected, cont.getAllocatedResource());
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class ProportionalCapacityPreemptionPolicy method syncKillableContainersFromScheduler.
private void syncKillableContainersFromScheduler() {
// sync preemptable entities from scheduler
preemptableQueues = scheduler.getPreemptionManager().getShallowCopyOfPreemptableQueues();
killableContainers = new HashSet<>();
for (Map.Entry<String, PreemptableQueue> entry : preemptableQueues.entrySet()) {
PreemptableQueue entity = entry.getValue();
for (Map<ContainerId, RMContainer> map : entity.getKillableContainers().values()) {
killableContainers.addAll(map.keySet());
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class TestClientRMService method getRMApp.
private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, ApplicationId applicationId3, YarnConfiguration config, String queueName, final long memorySeconds, final long vcoreSeconds, String appNodeLabelExpression, String amNodeLabelExpression) {
ApplicationSubmissionContext asContext = mock(ApplicationSubmissionContext.class);
when(asContext.getMaxAppAttempts()).thenReturn(1);
when(asContext.getNodeLabelExpression()).thenReturn(appNodeLabelExpression);
when(asContext.getPriority()).thenReturn(Priority.newInstance(0));
RMAppImpl app = spy(new RMAppImpl(applicationId3, rmContext, config, null, null, queueName, asContext, yarnScheduler, null, System.currentTimeMillis(), "YARN", null, BuilderUtils.newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, Resource.newInstance(1024, 1), 1)) {
@Override
public ApplicationReport createAndGetApplicationReport(String clientUserName, boolean allowAccess) {
ApplicationReport report = super.createAndGetApplicationReport(clientUserName, allowAccess);
ApplicationResourceUsageReport usageReport = report.getApplicationResourceUsageReport();
usageReport.setMemorySeconds(memorySeconds);
usageReport.setVcoreSeconds(vcoreSeconds);
report.setApplicationResourceUsageReport(usageReport);
return report;
}
});
app.getAMResourceRequest().setNodeLabelExpression(amNodeLabelExpression);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(123456, 1), 1);
RMAppAttemptImpl rmAppAttemptImpl = spy(new RMAppAttemptImpl(attemptId, rmContext, yarnScheduler, null, asContext, config, null, app));
Container container = Container.newInstance(ContainerId.newContainerId(attemptId, 1), null, "", null, null, null);
RMContainerImpl containerimpl = spy(new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), attemptId, null, "", rmContext));
Map<ApplicationAttemptId, RMAppAttempt> attempts = new HashMap<ApplicationAttemptId, RMAppAttempt>();
attempts.put(attemptId, rmAppAttemptImpl);
when(app.getCurrentAppAttempt()).thenReturn(rmAppAttemptImpl);
when(app.getAppAttempts()).thenReturn(attempts);
when(app.getApplicationPriority()).thenReturn(Priority.newInstance(0));
when(rmAppAttemptImpl.getMasterContainer()).thenReturn(container);
ResourceScheduler rs = mock(ResourceScheduler.class);
when(rmContext.getScheduler()).thenReturn(rs);
when(rmContext.getScheduler().getRMContainer(any(ContainerId.class))).thenReturn(containerimpl);
SchedulerAppReport sAppReport = mock(SchedulerAppReport.class);
when(rmContext.getScheduler().getSchedulerAppInfo(any(ApplicationAttemptId.class))).thenReturn(sAppReport);
List<RMContainer> rmContainers = new ArrayList<RMContainer>();
rmContainers.add(containerimpl);
when(rmContext.getScheduler().getSchedulerAppInfo(attemptId).getLiveContainers()).thenReturn(rmContainers);
ContainerStatus cs = mock(ContainerStatus.class);
when(containerimpl.completed()).thenReturn(false);
when(containerimpl.getDiagnosticsInfo()).thenReturn("N/A");
when(containerimpl.getContainerExitStatus()).thenReturn(0);
when(containerimpl.getContainerState()).thenReturn(ContainerState.COMPLETE);
return app;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class TestNodeBlacklistingOnAMFailures method testNodeBlacklistingOnAMFailure.
@Test(timeout = 100000)
public void testNodeBlacklistingOnAMFailure() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = startRM(conf, dispatcher);
CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler();
// Register 5 nodes, so that we can blacklist atleast one if AM container
// is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1.
// First register 2 nodes, and after AM lauched register 3 more nodes.
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService());
nm2.registerNode();
RMApp app = rm.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm1);
ContainerId amContainerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
RMContainer rmContainer = scheduler.getRMContainer(amContainerId);
NodeId nodeWhereAMRan = rmContainer.getAllocatedNode();
MockNM currentNode, otherNode;
if (nodeWhereAMRan.equals(nm1.getNodeId())) {
currentNode = nm1;
otherNode = nm2;
} else {
currentNode = nm2;
otherNode = nm1;
}
// register 3 nodes now
MockNM nm3 = new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService());
nm3.registerNode();
MockNM nm4 = new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService());
nm4.registerNode();
MockNM nm5 = new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService());
nm5.registerNode();
// Set the exist status to INVALID so that we can verify that the system
// automatically blacklisting the node
makeAMContainerExit(rm, amContainerId, currentNode, ContainerExitStatus.INVALID);
// restart the am
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
System.out.println("New AppAttempt launched " + attempt.getAppAttemptId());
// Try the current node a few times
for (int i = 0; i <= 2; i++) {
currentNode.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals("AppAttemptState should still be SCHEDULED if currentNode is " + "blacklisted correctly", RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
}
// Now try the other node
otherNode.nodeHeartbeat(true);
dispatcher.await();
// Now the AM container should be allocated
MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000);
MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
amContainerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
rmContainer = scheduler.getRMContainer(amContainerId);
nodeWhereAMRan = rmContainer.getAllocatedNode();
// The other node should now receive the assignment
Assert.assertEquals("After blacklisting, AM should have run on the other node", otherNode.getNodeId(), nodeWhereAMRan);
am2.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
List<Container> allocatedContainers = TestAMRestart.allocateContainers(currentNode, am2, 1);
Assert.assertEquals("Even though AM is blacklisted from the node, application can " + "still allocate non-AM containers there", currentNode.getNodeId(), allocatedContainers.get(0).getNodeId());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
Aggregations