use of org.apache.hadoop.yarn.api.records.ContainerId in project hadoop by apache.
the class ProportionalCapacityPreemptionPolicy method syncKillableContainersFromScheduler.
private void syncKillableContainersFromScheduler() {
// sync preemptable entities from scheduler
preemptableQueues = scheduler.getPreemptionManager().getShallowCopyOfPreemptableQueues();
killableContainers = new HashSet<>();
for (Map.Entry<String, PreemptableQueue> entry : preemptableQueues.entrySet()) {
PreemptableQueue entity = entry.getValue();
for (Map<ContainerId, RMContainer> map : entity.getKillableContainers().values()) {
killableContainers.addAll(map.keySet());
}
}
}
use of org.apache.hadoop.yarn.api.records.ContainerId in project hadoop by apache.
the class TestNodeBlacklistingOnAMFailures method testNodeBlacklistingOnAMFailure.
@Test(timeout = 100000)
public void testNodeBlacklistingOnAMFailure() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = startRM(conf, dispatcher);
CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler();
// Register 5 nodes, so that we can blacklist atleast one if AM container
// is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1.
// First register 2 nodes, and after AM lauched register 3 more nodes.
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService());
nm2.registerNode();
RMApp app = rm.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm1);
ContainerId amContainerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
RMContainer rmContainer = scheduler.getRMContainer(amContainerId);
NodeId nodeWhereAMRan = rmContainer.getAllocatedNode();
MockNM currentNode, otherNode;
if (nodeWhereAMRan.equals(nm1.getNodeId())) {
currentNode = nm1;
otherNode = nm2;
} else {
currentNode = nm2;
otherNode = nm1;
}
// register 3 nodes now
MockNM nm3 = new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService());
nm3.registerNode();
MockNM nm4 = new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService());
nm4.registerNode();
MockNM nm5 = new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService());
nm5.registerNode();
// Set the exist status to INVALID so that we can verify that the system
// automatically blacklisting the node
makeAMContainerExit(rm, amContainerId, currentNode, ContainerExitStatus.INVALID);
// restart the am
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
System.out.println("New AppAttempt launched " + attempt.getAppAttemptId());
// Try the current node a few times
for (int i = 0; i <= 2; i++) {
currentNode.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals("AppAttemptState should still be SCHEDULED if currentNode is " + "blacklisted correctly", RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
}
// Now try the other node
otherNode.nodeHeartbeat(true);
dispatcher.await();
// Now the AM container should be allocated
MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000);
MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
amContainerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
rmContainer = scheduler.getRMContainer(amContainerId);
nodeWhereAMRan = rmContainer.getAllocatedNode();
// The other node should now receive the assignment
Assert.assertEquals("After blacklisting, AM should have run on the other node", otherNode.getNodeId(), nodeWhereAMRan);
am2.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
List<Container> allocatedContainers = TestAMRestart.allocateContainers(currentNode, am2, 1);
Assert.assertEquals("Even though AM is blacklisted from the node, application can " + "still allocate non-AM containers there", currentNode.getNodeId(), allocatedContainers.get(0).getNodeId());
}
use of org.apache.hadoop.yarn.api.records.ContainerId in project hadoop by apache.
the class TestApplicationMasterLauncher method testSetupTokens.
@Test
public void testSetupTokens() throws Exception {
MockRM rm = new MockRM();
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5000);
RMApp app = rm.submitApp(2000);
/// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
MyAMLauncher launcher = new MyAMLauncher(rm.getRMContext(), attempt, AMLauncherEventType.LAUNCH, rm.getConfig());
DataOutputBuffer dob = new DataOutputBuffer();
Credentials ts = new Credentials();
ts.writeTokenStorageToStream(dob);
ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(null, null, null, null, securityTokens, null);
ContainerId containerId = ContainerId.newContainerId(attempt.getAppAttemptId(), 0L);
try {
launcher.setupTokens(amContainer, containerId);
} catch (Exception e) {
// ignore the first fake exception
}
try {
launcher.setupTokens(amContainer, containerId);
} catch (java.io.EOFException e) {
Assert.fail("EOFException should not happen.");
}
}
use of org.apache.hadoop.yarn.api.records.ContainerId in project hadoop by apache.
the class TestApplicationMasterService method testInvalidContainerReleaseRequest.
@Test(timeout = 600000)
public void testInvalidContainerReleaseRequest() throws Exception {
MockRM rm = new MockRM(conf);
try {
rm.start();
// Register node1
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
// Submit an application
RMApp app1 = rm.submitApp(1024);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.addRequests(new String[] { "127.0.0.1" }, GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
sleep(1000);
alloc1Response = am1.schedule();
}
Assert.assertTrue(alloc1Response.getAllocatedContainers().size() > 0);
RMApp app2 = rm.submitApp(1024);
nm1.nodeHeartbeat(true);
RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
am2.registerAppAttempt();
// Now trying to release container allocated for app1 -> appAttempt1.
ContainerId cId = alloc1Response.getAllocatedContainers().get(0).getId();
am2.addContainerToBeReleased(cId);
try {
am2.schedule();
Assert.fail("Exception was expected!!");
} catch (InvalidContainerReleaseException e) {
StringBuilder sb = new StringBuilder("Cannot release container : ");
sb.append(cId.toString());
sb.append(" not belonging to this application attempt : ");
sb.append(attempt2.getAppAttemptId().toString());
Assert.assertTrue(e.getMessage().contains(sb.toString()));
}
} finally {
if (rm != null) {
rm.stop();
}
}
}
use of org.apache.hadoop.yarn.api.records.ContainerId in project hadoop by apache.
the class TestClientRMService method testGetContainerReport.
@Test
public void testGetContainerReport() throws YarnException, IOException {
ClientRMService rmService = createRMService();
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
GetContainerReportRequest request = recordFactory.newRecordInstance(GetContainerReportRequest.class);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(123456, 1), 1);
ContainerId containerId = ContainerId.newContainerId(attemptId, 1);
request.setContainerId(containerId);
try {
GetContainerReportResponse response = rmService.getContainerReport(request);
Assert.assertEquals(containerId, response.getContainerReport().getContainerId());
} catch (ApplicationNotFoundException ex) {
Assert.fail(ex.getMessage());
}
}
Aggregations