use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRestart method testAMRestartNotLostContainerCompleteMsg.
@Test(timeout = 30000)
public void testAMRestartNotLostContainerCompleteMsg() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MockRM rm1 = new MockRM(conf);
rm1.start();
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
nm1.registerNode();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
allocateContainers(nm1, am1, 1);
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// container complete
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.COMPLETE);
rm1.waitForState(nm1, containerId2, RMContainerState.COMPLETED);
// before this msg pass to AM, AM may crash
while (true) {
AllocateResponse response = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = response.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Thread.sleep(100);
continue;
}
// is containerId still in justFinishedContainer?
containerStatuses = app1.getCurrentAppAttempt().getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
break;
}
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// wait for app to start a new attempt.
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
// assert this is a new AM.
ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
// launch the new AM
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
MockAM am2 = rm1.launchAndRegisterAM(app1, rm1, nm1);
// whether new AM could get container complete msg
AllocateResponse allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
List<ContainerStatus> containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2) == false) {
Assert.fail();
}
containerStatuses = attempt2.getJustFinishedContainers();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
// the second allocate should not get container complete msg
allocateResponse = am2.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
containerStatuses = allocateResponse.getCompletedContainersStatuses();
if (isContainerIdInContainerStatus(containerStatuses, containerId2)) {
Assert.fail();
}
rm1.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestCapacityScheduler method testAppReservationWithDominantResourceCalculator.
// Test to ensure that we don't carry out reservation on nodes
// that have no CPU available when using the DominantResourceCalculator
@Test(timeout = 30000)
public void testAppReservationWithDominantResourceCalculator() throws Exception {
CapacitySchedulerConfiguration csconf = new CapacitySchedulerConfiguration();
csconf.setResourceComparator(DominantResourceCalculator.class);
YarnConfiguration conf = new YarnConfiguration(csconf);
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10 * GB, 1);
// register extra nodes to bump up cluster resource
MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10 * GB, 4);
rm.registerNode("127.0.0.1:1236", 10 * GB, 4);
RMApp app1 = rm.submitApp(1024);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report
Assert.assertEquals(1 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(9 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 1 * GB, 1, 1);
// send the request
am1.schedule();
// kick the scheduler, container reservation should not happen
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
AllocateResponse allocResponse = am1.schedule();
ApplicationResourceUsageReport report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(0, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
// container should get allocated on this node
nm2.nodeHeartbeat(true);
while (allocResponse.getAllocatedContainers().size() == 0) {
Thread.sleep(100);
allocResponse = am1.schedule();
}
report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(1, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestCapacityScheduler method testResourceOverCommit.
@Test
public void testResourceOverCommit() throws Exception {
int waitCount;
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
AdminService as = ((MockRM) rm).getAdminService();
as.updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
// Verify no NPE is trigger in schedule after resource is updated.
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 1, 1);
alloc1Response = am1.schedule();
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
int times = 0;
// try 10 times as scheduling is async process.
while (alloc1Response.getAllocatedContainers().size() < 1 && times++ < 10) {
LOG.info("Waiting for containers to be allocated for app 1... Tried " + times + " times already..");
Thread.sleep(100);
}
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestSchedulingWithAllocationRequestId method testMultipleAllocationRequestIds.
@Test
public void testMultipleAllocationRequestIds() throws Exception {
configureScheduler();
YarnConfiguration conf = getConf();
MockRM rm = new MockRM(conf);
try {
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
// add request for containers with id 10 & 20
am1.addRequests(new String[] { "127.0.0.1" }, 2 * GB, 1, 1, 10L);
// send the request
AllocateResponse allocResponse = am1.schedule();
am1.addRequests(new String[] { "127.0.0.2" }, 2 * GB, 1, 2, 20L);
// send the request
allocResponse = am1.schedule();
// check if request id 10 is satisfied
nm1.nodeHeartbeat(true);
// send the request
allocResponse = am1.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
List<Container> allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 2 * GB, nm1.getNodeId(), 10);
// check now if request id 20 is satisfied
nm2.nodeHeartbeat(true);
while (allocResponse.getAllocatedContainers().size() < 2) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(2, allocated.size());
for (Container container : allocated) {
checkAllocatedContainer(container, 2 * GB, nm2.getNodeId(), 20);
}
} finally {
if (rm != null) {
rm.stop();
}
}
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestSchedulingWithAllocationRequestId method testMultipleAppsWithAllocationReqId.
@Test
public void testMultipleAppsWithAllocationReqId() throws Exception {
configureScheduler();
YarnConfiguration conf = getConf();
MockRM rm = new MockRM(conf);
try {
rm.start();
// Register node1
String host0 = "host_0";
String host1 = "host_1";
MockNM nm1 = new MockNM(host0 + ":1234", 8 * GB, rm.getResourceTrackerService());
nm1.registerNode();
// Register node2
MockNM nm2 = new MockNM(host1 + ":2351", 8 * GB, rm.getResourceTrackerService());
nm2.registerNode();
// submit 1st app
RMApp app1 = rm.submitApp(1 * GB, "user_0", "a1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
// Submit app1 RR with allocationReqId = 5
int numContainers = 1;
am1.addRequests(new String[] { host0, host1 }, 1 * GB, 1, numContainers, 5L);
AllocateResponse allocResponse = am1.schedule();
// wait for containers to be allocated.
nm1.nodeHeartbeat(true);
// send the request
allocResponse = am1.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
List<Container> allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 1 * GB, nm1.getNodeId(), 5L);
// Submit another application
RMApp app2 = rm.submitApp(1 * GB, "user_1", "a2");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm, nm2);
// Submit app2 RR with allocationReqId = 5
am2.addRequests(new String[] { host0, host1 }, 2 * GB, 1, numContainers, 5L);
am2.schedule();
// wait for containers to be allocated.
nm2.nodeHeartbeat(true);
// send the request
allocResponse = am2.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am2.schedule();
}
allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 2 * GB, nm2.getNodeId(), 5L);
// Now submit app2 RR with allocationReqId = 10
am2.addRequests(new String[] { host0, host1 }, 3 * GB, 1, numContainers, 10L);
am2.schedule();
// wait for containers to be allocated.
nm1.nodeHeartbeat(true);
// send the request
allocResponse = am2.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am2.schedule();
}
allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 3 * GB, nm1.getNodeId(), 10L);
// Now submit app1 RR with allocationReqId = 10
am1.addRequests(new String[] { host0, host1 }, 4 * GB, 1, numContainers, 10L);
am1.schedule();
// wait for containers to be allocated.
nm2.nodeHeartbeat(true);
// send the request
allocResponse = am1.schedule();
while (allocResponse.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
allocResponse = am1.schedule();
}
allocated = allocResponse.getAllocatedContainers();
Assert.assertEquals(1, allocated.size());
checkAllocatedContainer(allocated.get(0), 4 * GB, nm2.getNodeId(), 10L);
} finally {
if (rm != null) {
rm.stop();
}
}
}
Aggregations