use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project hadoop by apache.
the class TestContainerManager method testMultipleContainersStopAndGetStatus.
@Test
public void testMultipleContainersStopAndGetStatus() throws Exception {
containerManager.start();
List<StartContainerRequest> startRequest = new ArrayList<>();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
List<ContainerId> containerIds = new ArrayList<>();
for (int i = 0; i < 10; i++) {
ContainerId cId;
if ((i & 1) == 0) {
// Containers with even id belong to an unauthorized app
cId = createContainerId(i, 1);
} else {
cId = createContainerId(i, 0);
}
Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager());
StartContainerRequest request = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
startRequest.add(request);
containerIds.add(cId);
}
// start containers
StartContainersRequest requestList = StartContainersRequest.newInstance(startRequest);
containerManager.startContainers(requestList);
Thread.sleep(5000);
// Get container statuses
GetContainerStatusesRequest statusRequest = GetContainerStatusesRequest.newInstance(containerIds);
GetContainerStatusesResponse statusResponse = containerManager.getContainerStatuses(statusRequest);
Assert.assertEquals(5, statusResponse.getContainerStatuses().size());
for (ContainerStatus status : statusResponse.getContainerStatuses()) {
// Containers with odd id should succeed
Assert.assertEquals(1, status.getContainerId().getContainerId() & 1);
}
Assert.assertEquals(5, statusResponse.getFailedRequests().size());
for (Map.Entry<ContainerId, SerializedException> entry : statusResponse.getFailedRequests().entrySet()) {
// Containers with even id should fail.
Assert.assertEquals(0, entry.getKey().getContainerId() & 1);
Assert.assertTrue(entry.getValue().getMessage().contains("attempted to get status for non-application container"));
}
// stop containers
StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
StopContainersResponse stopResponse = containerManager.stopContainers(stopRequest);
Assert.assertEquals(5, stopResponse.getSuccessfullyStoppedContainers().size());
for (ContainerId id : stopResponse.getSuccessfullyStoppedContainers()) {
// Containers with odd id should succeed.
Assert.assertEquals(1, id.getContainerId() & 1);
}
Assert.assertEquals(5, stopResponse.getFailedRequests().size());
for (Map.Entry<ContainerId, SerializedException> entry : stopResponse.getFailedRequests().entrySet()) {
// Containers with even id should fail.
Assert.assertEquals(0, entry.getKey().getContainerId() & 1);
Assert.assertTrue(entry.getValue().getMessage().contains("attempted to stop non-application container"));
}
}
use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project hadoop by apache.
the class TestContainerSchedulerQueuing method testStopQueuedContainer.
/**
* Start running one GUARANTEED container and queue two OPPORTUNISTIC ones.
* Try killing one of the two queued containers.
* @throws Exception
*/
@Test
public void testStopQueuedContainer() throws Exception {
containerManager.start();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
List<StartContainerRequest> list = new ArrayList<>();
list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(2048, 1), context.getContainerTokenSecretManager(), null, ExecutionType.GUARANTEED)));
list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(512, 1), context.getContainerTokenSecretManager(), null, ExecutionType.OPPORTUNISTIC)));
list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(512, 1), context.getContainerTokenSecretManager(), null, ExecutionType.OPPORTUNISTIC)));
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
Thread.sleep(2000);
// Assert there is initially one container running and two queued.
int runningContainersNo = 0;
int queuedContainersNo = 0;
List<ContainerId> statList = new ArrayList<ContainerId>();
for (int i = 0; i < 3; i++) {
statList.add(createContainerId(i));
}
GetContainerStatusesRequest statRequest = GetContainerStatusesRequest.newInstance(statList);
List<ContainerStatus> containerStatuses = containerManager.getContainerStatuses(statRequest).getContainerStatuses();
for (ContainerStatus status : containerStatuses) {
if (status.getState() == org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) {
runningContainersNo++;
} else if (status.getState() == org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) {
queuedContainersNo++;
}
System.out.println("\nStatus : [" + status + "]\n");
}
Assert.assertEquals(1, runningContainersNo);
Assert.assertEquals(2, queuedContainersNo);
// Stop one of the two queued containers.
StopContainersRequest stopRequest = StopContainersRequest.newInstance(Arrays.asList(createContainerId(1)));
containerManager.stopContainers(stopRequest);
Thread.sleep(2000);
// Assert queued container got properly stopped.
statList.clear();
for (int i = 0; i < 3; i++) {
statList.add(createContainerId(i));
}
statRequest = GetContainerStatusesRequest.newInstance(statList);
HashMap<org.apache.hadoop.yarn.api.records.ContainerState, ContainerStatus> map = new HashMap<>();
for (int i = 0; i < 10; i++) {
containerStatuses = containerManager.getContainerStatuses(statRequest).getContainerStatuses();
for (ContainerStatus status : containerStatuses) {
System.out.println("\nStatus : [" + status + "]\n");
map.put(status.getState(), status);
if (map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) && map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) && map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE)) {
break;
}
Thread.sleep(1000);
}
}
Assert.assertEquals(createContainerId(0), map.get(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING).getContainerId());
Assert.assertEquals(createContainerId(1), map.get(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE).getContainerId());
Assert.assertEquals(createContainerId(2), map.get(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED).getContainerId());
}
use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project flink by apache.
the class YARNSessionCapacitySchedulerITCase method testTaskManagerFailure.
/**
* Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
*/
// timeout after 100 seconds
@Test(timeout = 100000)
public void testTaskManagerFailure() {
LOG.info("Starting testTaskManagerFailure()");
Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // set the slots 3 to check if the vCores are set properly!
"-s", // set the slots 3 to check if the vCores are set properly!
"3", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);
Assert.assertEquals(2, getRunningContainers());
// ------------------------ Test if JobManager web interface is accessible -------
YarnClient yc = null;
try {
yc = YarnClient.createYarnClient();
yc.init(yarnConfiguration);
yc.start();
List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
// Only one running
Assert.assertEquals(1, apps.size());
ApplicationReport app = apps.get(0);
Assert.assertEquals("customName", app.getName());
String url = app.getTrackingUrl();
if (!url.endsWith("/")) {
url += "/";
}
if (!url.startsWith("http://")) {
url = "http://" + url;
}
LOG.info("Got application URL from YARN {}", url);
String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
JsonNode parsedTMs = new ObjectMapper().readTree(response);
ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
Assert.assertNotNull(taskManagers);
Assert.assertEquals(1, taskManagers.size());
Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());
// get the configuration from webinterface & check if the dynamic properties from YARN show up there.
String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);
Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));
// -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
// first, get the hostname/port
String oC = outContent.toString();
Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
Matcher matches = p.matcher(oC);
String hostname = null;
String port = null;
while (matches.find()) {
hostname = matches.group(1).toLowerCase();
port = matches.group(2);
}
LOG.info("Extracted hostname:port: {} {}", hostname, port);
Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));
// test logfile access
String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
Assert.assertTrue(logs.contains("Starting JobManager"));
Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
} catch (Throwable e) {
LOG.warn("Error while running test", e);
Assert.fail(e.getMessage());
}
// ------------------------ Kill container with TaskManager and check if vcores are set correctly -------
// find container id of taskManager:
ContainerId taskManagerContainer = null;
NodeManager nodeManager = null;
UserGroupInformation remoteUgi = null;
NMTokenIdentifier nmIdent = null;
try {
remoteUgi = UserGroupInformation.getCurrentUser();
} catch (IOException e) {
LOG.warn("Unable to get curr user", e);
Assert.fail();
}
for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
NodeManager nm = yarnCluster.getNodeManager(nmId);
ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
if (command.contains(YarnTaskManager.class.getSimpleName())) {
taskManagerContainer = entry.getKey();
nodeManager = nm;
nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
// allow myself to do stuff with the container
// remoteUgi.addCredentials(entry.getValue().getCredentials());
remoteUgi.addTokenIdentifier(nmIdent);
}
}
sleep(500);
}
Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
Assert.assertNotNull("Illegal state", nodeManager);
yc.stop();
List<ContainerId> toStop = new LinkedList<ContainerId>();
toStop.add(taskManagerContainer);
StopContainersRequest scr = StopContainersRequest.newInstance(toStop);
try {
nodeManager.getNMContext().getContainerManager().stopContainers(scr);
} catch (Throwable e) {
LOG.warn("Error stopping container", e);
Assert.fail("Error stopping container: " + e.getMessage());
}
// stateful termination check:
// wait until we saw a container being killed and AFTERWARDS a new one launched
boolean ok = false;
do {
LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());
String o = errContent.toString();
int killedOff = o.indexOf("Container killed by the ApplicationMaster");
if (killedOff != -1) {
o = o.substring(killedOff);
ok = o.indexOf("Launching TaskManager") > 0;
}
sleep(1000);
} while (!ok);
// send "stop" command to command line interface
runner.sendStop();
// wait for the thread to stop
try {
runner.join(1000);
} catch (InterruptedException e) {
LOG.warn("Interrupted while stopping runner", e);
}
LOG.warn("stopped");
// ----------- Send output to logger
System.setOut(originalStdout);
System.setErr(originalStderr);
String oC = outContent.toString();
String eC = errContent.toString();
LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);
// ------ Check if everything happened correctly
Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));
Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host"));
// cleanup auth for the subsequent tests.
remoteUgi.getTokenIdentifiers().remove(nmIdent);
LOG.info("Finished testTaskManagerFailure()");
}
Aggregations