use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestWorkPreservingRMRestart method testReleasedContainerNotRecovered.
// Test if RM on recovery receives the container release request from AM
// before it receives the container status reported by NM for recovery. this
// container should not be recovered.
@Test(timeout = 50000)
public void testReleasedContainerNotRecovered() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
rm1.start();
RMApp app1 = rm1.submitApp(1024);
final MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Re-start RM
conf.setInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, 8000);
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt(true);
// try to release a container before the container is actually recovered.
final ContainerId runningContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
am1.allocate(null, Arrays.asList(runningContainer));
// send container statuses to recover the containers
List<NMContainerStatus> containerStatuses = createNMContainerStatusForApp(am1);
nm1.registerNode(containerStatuses, null);
// only the am container should be recovered.
waitForNumContainersToRecover(1, rm2, am1.getApplicationAttemptId());
final AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
// cached release request is cleaned.
// assertFalse(scheduler.getPendingRelease().contains(runningContainer));
AllocateResponse response = am1.allocate(null, null);
// AM gets notified of the completed container.
boolean receivedCompletedContainer = false;
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
if (status.getContainerId().equals(runningContainer)) {
receivedCompletedContainer = true;
}
}
assertTrue(receivedCompletedContainer);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
// recovered
return scheduler.getApplicationAttempt(am1.getApplicationAttemptId()).getPendingRelease().isEmpty() && scheduler.getRMContainer(runningContainer) == null;
}
}, 1000, 20000);
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestSchedulingWithAllocationRequestId method testMultipleAllocationRequestDiffPriority.
@Test
public void testMultipleAllocationRequestDiffPriority() throws Exception {
configureScheduler();
YarnConfiguration conf = getConf();
MockRM rm = new MockRM(conf);
try {
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
// add request for containers with id 10 & 20
am1.addRequests(new String[] { "127.0.0.1" }, 2 * GB, 2, 1, 10L);
// send the request
AllocateResponse allocResponse = am1.schedule();
am1.addRequests(new String[] { "127.0.0.2" }, 2 * GB, 1, 2, 20L);
// send the request
allocResponse = am1.schedule();
// check if request id 20 is satisfied first
nm2.nodeHeartbeat(true);
while (allocResponse.getAllocatedContainers().size() < 2) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
List<Container> allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(2, allocated.size());
for (Container container : allocated) {
checkAllocatedContainer(container, 2 * GB, nm2.getNodeId(), 20);
}
// check now if request id 10 is satisfied
nm1.nodeHeartbeat(true);
// send the request
allocResponse = am1.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 2 * GB, nm1.getNodeId(), 10);
} finally {
if (rm != null) {
rm.stop();
}
}
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.
@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// All nodes 1 to 2 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
// Verify that container is actually running wrt the RM..
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify that the Container is still in RUNNING state wrt RM..
rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMProxyService method testAllocateRequestWithNullValues.
@Test
public void testAllocateRequestWithNullValues() throws Exception {
int testAppId = 1;
RegisterApplicationMasterResponse registerResponse = registerApplicationMaster(testAppId);
Assert.assertNotNull(registerResponse);
Assert.assertEquals(Integer.toString(testAppId), registerResponse.getQueue());
AllocateResponse allocateResponse = allocate(testAppId);
Assert.assertNotNull(allocateResponse);
FinishApplicationMasterResponse finshResponse = finishApplicationMaster(testAppId, FinalApplicationStatus.SUCCEEDED);
Assert.assertNotNull(finshResponse);
Assert.assertEquals(true, finshResponse.getIsUnregistered());
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMProxyService method releaseContainersAndAssert.
private void releaseContainersAndAssert(int appId, List<Container> containers) throws Exception {
Assert.assertTrue(containers.size() > 0);
AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class);
allocateRequest.setResponseId(1);
List<ContainerId> relList = new ArrayList<ContainerId>(containers.size());
for (Container container : containers) {
relList.add(container.getId());
}
allocateRequest.setReleaseList(relList);
AllocateResponse allocateResponse = allocate(appId, allocateRequest);
Assert.assertNotNull(allocateResponse);
Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
// The way the mock resource manager is setup, it will return the containers
// that were released in the response. This is done because the UAMs run
// asynchronously and we need to if all the resource managers received the
// release it. The containers sent by the mock resource managers will be
// aggregated and returned back to us and we can assert if all the release
// lists reached the sub-clusters
List<Container> containersForReleasedContainerIds = new ArrayList<Container>();
containersForReleasedContainerIds.addAll(allocateResponse.getAllocatedContainers());
// Send max 10 heart beats to receive all the containers. If not, we will
// fail the test
int numHeartbeat = 0;
while (containersForReleasedContainerIds.size() < relList.size() && numHeartbeat++ < 10) {
allocateResponse = allocate(appId, Records.newRecord(AllocateRequest.class));
Assert.assertNotNull(allocateResponse);
Assert.assertNull("new AMRMToken from RM should have been nulled by AMRMProxyService", allocateResponse.getAMRMToken());
containersForReleasedContainerIds.addAll(allocateResponse.getAllocatedContainers());
LOG.info("Number of containers received in this request: " + Integer.toString(allocateResponse.getAllocatedContainers().size()));
LOG.info("Total number of containers received: " + Integer.toString(containersForReleasedContainerIds.size()));
Thread.sleep(10);
}
Assert.assertEquals(relList.size(), containersForReleasedContainerIds.size());
}
Aggregations