use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestAMRMClient method triggerSchedulingWithNMHeartBeat.
/**
* Make sure we get allocations regardless of timing issues.
*/
private void triggerSchedulingWithNMHeartBeat() {
// Simulate fair scheduler update thread
RMContext context = yarnCluster.getResourceManager().getRMContext();
if (context.getScheduler() instanceof FairScheduler) {
FairScheduler scheduler = (FairScheduler) context.getScheduler();
scheduler.update();
}
// Trigger NM's heartbeat to RM and trigger allocations
for (RMNode rmNode : context.getRMNodes().values()) {
context.getScheduler().handle(new NodeUpdateSchedulerEvent(rmNode));
}
if (context.getScheduler() instanceof FairScheduler) {
FairScheduler scheduler = (FairScheduler) context.getScheduler();
scheduler.update();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class SLSCapacityScheduler method handle.
@Override
public void handle(SchedulerEvent schedulerEvent) {
// metrics off
if (!metricsON) {
super.handle(schedulerEvent);
return;
}
if (!running)
running = true;
// metrics on
Timer.Context handlerTimer = null;
Timer.Context operationTimer = null;
NodeUpdateSchedulerEventWrapper eventWrapper;
try {
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE && schedulerEvent instanceof NodeUpdateSchedulerEvent) {
eventWrapper = new NodeUpdateSchedulerEventWrapper((NodeUpdateSchedulerEvent) schedulerEvent);
schedulerEvent = eventWrapper;
updateQueueWithNodeUpdate(eventWrapper);
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
// check if having AM Container, update resource usage information
AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
String queue = appQueueMap.get(appAttemptId);
SchedulerAppReport app = super.getSchedulerAppInfo(appAttemptId);
if (!app.getLiveContainers().isEmpty()) {
// have 0 or 1
// should have one container which is AM container
RMContainer rmc = app.getLiveContainers().iterator().next();
updateQueueMetrics(queue, rmc.getContainer().getResource().getMemorySize(), rmc.getContainer().getResource().getVirtualCores());
}
}
handlerTimer = schedulerHandleTimer.time();
operationTimer = schedulerHandleTimerMap.get(schedulerEvent.getType()).time();
super.handle(schedulerEvent);
} finally {
if (handlerTimer != null)
handlerTimer.stop();
if (operationTimer != null)
operationTimer.stop();
schedulerHandleCounter.inc();
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
SLSRunner.decreaseRemainingApps();
AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
AppAttemptAddedSchedulerEvent appAddEvent = (AppAttemptAddedSchedulerEvent) schedulerEvent;
SchedulerApplication app = applications.get(appAddEvent.getApplicationAttemptId().getApplicationId());
appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue().getQueueName());
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class ResourceSchedulerWrapper method handle.
@Override
public void handle(SchedulerEvent schedulerEvent) {
// metrics off
if (!metricsON) {
scheduler.handle(schedulerEvent);
return;
}
if (!running)
running = true;
// metrics on
Timer.Context handlerTimer = null;
Timer.Context operationTimer = null;
NodeUpdateSchedulerEventWrapper eventWrapper;
try {
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE && schedulerEvent instanceof NodeUpdateSchedulerEvent) {
eventWrapper = new NodeUpdateSchedulerEventWrapper((NodeUpdateSchedulerEvent) schedulerEvent);
schedulerEvent = eventWrapper;
updateQueueWithNodeUpdate(eventWrapper);
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
// check if having AM Container, update resource usage information
AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
String queue = appQueueMap.get(appAttemptId.getApplicationId());
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
if (!app.getLiveContainers().isEmpty()) {
// have 0 or 1
// should have one container which is AM container
RMContainer rmc = app.getLiveContainers().iterator().next();
updateQueueMetrics(queue, rmc.getContainer().getResource().getMemorySize(), rmc.getContainer().getResource().getVirtualCores());
}
}
handlerTimer = schedulerHandleTimer.time();
operationTimer = schedulerHandleTimerMap.get(schedulerEvent.getType()).time();
scheduler.handle(schedulerEvent);
} finally {
if (handlerTimer != null)
handlerTimer.stop();
if (operationTimer != null)
operationTimer.stop();
schedulerHandleCounter.inc();
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED && schedulerEvent instanceof AppRemovedSchedulerEvent) {
SLSRunner.decreaseRemainingApps();
AppRemovedSchedulerEvent appRemoveEvent = (AppRemovedSchedulerEvent) schedulerEvent;
appQueueMap.remove(appRemoveEvent.getApplicationID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED && schedulerEvent instanceof AppAddedSchedulerEvent) {
AppAddedSchedulerEvent appAddEvent = (AppAddedSchedulerEvent) schedulerEvent;
String queueName = appAddEvent.getQueue();
appQueueMap.put(appAddEvent.getApplicationId(), queueName);
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testNodeRemovalDuringAllocate.
@Test(timeout = 60000)
public void testNodeRemovalDuringAllocate() throws Exception {
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// Both node 1 and node 2 will be applicable for scheduling.
for (int i = 0; i < 10; i++) {
am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
if (ctxt.getNodeMap().size() == 2) {
break;
}
Thread.sleep(50);
}
Assert.assertEquals(2, ctxt.getNodeMap().size());
// Remove node from scheduler but not from AM Service.
scheduler.handle(new NodeRemovedSchedulerEvent(rmNode1));
// After removal of node 1, only 1 node will be applicable for scheduling.
for (int i = 0; i < 10; i++) {
try {
am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
} catch (Exception e) {
Assert.fail("Allocate request should be handled on node removal");
}
if (ctxt.getNodeMap().size() == 1) {
break;
}
Thread.sleep(50);
}
Assert.assertEquals(1, ctxt.getNodeMap().size());
}
Aggregations