use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.
the class AbstractYarnClusterDescriptor method getYarnClient.
/**
* Gets a Hadoop Yarn client
* @return Returns a YarnClient which has to be shutdown manually
*/
protected YarnClient getYarnClient() {
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
return yarnClient;
}
use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.
the class YARNSessionFIFOITCase method testDetachedMode.
/**
* Test regular operation, including command line parameter parsing.
*/
// timeout after a minute.
@Test(timeout = 60000)
public void testDetachedMode() throws InterruptedException {
LOG.info("Starting testDetachedMode()");
addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // test setting a custom name
"--name", // test setting a custom name
"MyCustomName", "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION);
// before checking any strings outputted by the CLI, first give it time to return
runner.join();
checkForLogString("The Flink YARN client has been started in detached mode");
LOG.info("Waiting until two containers are running");
// wait until two containers are running
while (getRunningContainers() < 2) {
sleep(500);
}
//additional sleep for the JM/TM to start and establish connection
sleep(2000);
LOG.info("Two containers are running. Killing the application");
// kill application "externally".
try {
YarnClient yc = YarnClient.createYarnClient();
yc.init(yarnConfiguration);
yc.start();
List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
// Only one running
Assert.assertEquals(1, apps.size());
ApplicationReport app = apps.get(0);
Assert.assertEquals("MyCustomName", app.getName());
ApplicationId id = app.getApplicationId();
yc.killApplication(id);
while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
sleep(500);
}
} catch (Throwable t) {
LOG.warn("Killing failed", t);
Assert.fail();
} finally {
//cleanup the yarn-properties file
String confDirPath = System.getenv("FLINK_CONF_DIR");
File configDirectory = new File(confDirPath);
LOG.info("testDetachedPerJobYarnClusterInternal: Using configuration directory " + configDirectory.getAbsolutePath());
// load the configuration
LOG.info("testDetachedPerJobYarnClusterInternal: Trying to load configuration file");
GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
try {
File yarnPropertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(GlobalConfiguration.loadConfiguration());
if (yarnPropertiesFile.exists()) {
LOG.info("testDetachedPerJobYarnClusterInternal: Cleaning up temporary Yarn address reference: {}", yarnPropertiesFile.getAbsolutePath());
yarnPropertiesFile.delete();
}
} catch (Exception e) {
LOG.warn("testDetachedPerJobYarnClusterInternal: Exception while deleting the JobManager address file", e);
}
}
LOG.info("Finished testDetachedMode()");
}
use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.
the class YARNSessionCapacitySchedulerITCase method testTaskManagerFailure.
/**
* Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
*/
// timeout after 100 seconds
@Test(timeout = 100000)
public void testTaskManagerFailure() {
LOG.info("Starting testTaskManagerFailure()");
Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // set the slots 3 to check if the vCores are set properly!
"-s", // set the slots 3 to check if the vCores are set properly!
"3", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);
Assert.assertEquals(2, getRunningContainers());
// ------------------------ Test if JobManager web interface is accessible -------
YarnClient yc = null;
try {
yc = YarnClient.createYarnClient();
yc.init(yarnConfiguration);
yc.start();
List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
// Only one running
Assert.assertEquals(1, apps.size());
ApplicationReport app = apps.get(0);
Assert.assertEquals("customName", app.getName());
String url = app.getTrackingUrl();
if (!url.endsWith("/")) {
url += "/";
}
if (!url.startsWith("http://")) {
url = "http://" + url;
}
LOG.info("Got application URL from YARN {}", url);
String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
JsonNode parsedTMs = new ObjectMapper().readTree(response);
ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
Assert.assertNotNull(taskManagers);
Assert.assertEquals(1, taskManagers.size());
Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());
// get the configuration from webinterface & check if the dynamic properties from YARN show up there.
String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);
Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));
// -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
// first, get the hostname/port
String oC = outContent.toString();
Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
Matcher matches = p.matcher(oC);
String hostname = null;
String port = null;
while (matches.find()) {
hostname = matches.group(1).toLowerCase();
port = matches.group(2);
}
LOG.info("Extracted hostname:port: {} {}", hostname, port);
Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));
// test logfile access
String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
Assert.assertTrue(logs.contains("Starting JobManager"));
Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
} catch (Throwable e) {
LOG.warn("Error while running test", e);
Assert.fail(e.getMessage());
}
// ------------------------ Kill container with TaskManager and check if vcores are set correctly -------
// find container id of taskManager:
ContainerId taskManagerContainer = null;
NodeManager nodeManager = null;
UserGroupInformation remoteUgi = null;
NMTokenIdentifier nmIdent = null;
try {
remoteUgi = UserGroupInformation.getCurrentUser();
} catch (IOException e) {
LOG.warn("Unable to get curr user", e);
Assert.fail();
}
for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
NodeManager nm = yarnCluster.getNodeManager(nmId);
ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
if (command.contains(YarnTaskManager.class.getSimpleName())) {
taskManagerContainer = entry.getKey();
nodeManager = nm;
nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
// allow myself to do stuff with the container
// remoteUgi.addCredentials(entry.getValue().getCredentials());
remoteUgi.addTokenIdentifier(nmIdent);
}
}
sleep(500);
}
Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
Assert.assertNotNull("Illegal state", nodeManager);
yc.stop();
List<ContainerId> toStop = new LinkedList<ContainerId>();
toStop.add(taskManagerContainer);
StopContainersRequest scr = StopContainersRequest.newInstance(toStop);
try {
nodeManager.getNMContext().getContainerManager().stopContainers(scr);
} catch (Throwable e) {
LOG.warn("Error stopping container", e);
Assert.fail("Error stopping container: " + e.getMessage());
}
// stateful termination check:
// wait until we saw a container being killed and AFTERWARDS a new one launched
boolean ok = false;
do {
LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());
String o = errContent.toString();
int killedOff = o.indexOf("Container killed by the ApplicationMaster");
if (killedOff != -1) {
o = o.substring(killedOff);
ok = o.indexOf("Launching TaskManager") > 0;
}
sleep(1000);
} while (!ok);
// send "stop" command to command line interface
runner.sendStop();
// wait for the thread to stop
try {
runner.join(1000);
} catch (InterruptedException e) {
LOG.warn("Interrupted while stopping runner", e);
}
LOG.warn("stopped");
// ----------- Send output to logger
System.setOut(originalStdout);
System.setErr(originalStderr);
String oC = outContent.toString();
String eC = errContent.toString();
LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);
// ------ Check if everything happened correctly
Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));
Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host"));
// cleanup auth for the subsequent tests.
remoteUgi.getTokenIdentifiers().remove(nmIdent);
LOG.info("Finished testTaskManagerFailure()");
}
use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.
the class TestLogsCLI method testFetchRunningApplicationLogs.
@Test(timeout = 5000)
public void testFetchRunningApplicationLogs() throws Exception {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
NodeId nodeId = NodeId.newInstance("localhost", 1234);
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
// Create a mock ApplicationAttempt Report
ApplicationAttemptReport mockAttemptReport = mock(ApplicationAttemptReport.class);
doReturn(appAttemptId).when(mockAttemptReport).getApplicationAttemptId();
List<ApplicationAttemptReport> attemptReports = Arrays.asList(mockAttemptReport);
// Create two mock containerReports
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 1);
ContainerReport mockContainerReport1 = mock(ContainerReport.class);
doReturn(containerId1).when(mockContainerReport1).getContainerId();
doReturn(nodeId).when(mockContainerReport1).getAssignedNode();
doReturn("http://localhost:2345").when(mockContainerReport1).getNodeHttpAddress();
ContainerId containerId2 = ContainerId.newContainerId(appAttemptId, 2);
ContainerReport mockContainerReport2 = mock(ContainerReport.class);
doReturn(containerId2).when(mockContainerReport2).getContainerId();
doReturn(nodeId).when(mockContainerReport2).getAssignedNode();
doReturn("http://localhost:2345").when(mockContainerReport2).getNodeHttpAddress();
List<ContainerReport> containerReports = Arrays.asList(mockContainerReport1, mockContainerReport2);
// Mock the YarnClient, and it would report the previous created
// mockAttemptReport and previous two created mockContainerReports
YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.RUNNING, ugi.getShortUserName(), true, attemptReports, containerReports);
LogsCLI cli = spy(new LogsCLIForTest(mockYarnClient));
doReturn(0).when(cli).printContainerLogsFromRunningApplication(any(Configuration.class), any(ContainerLogsRequest.class), any(LogCLIHelpers.class), anyBoolean());
cli.setConf(new YarnConfiguration());
int exitCode = cli.run(new String[] { "-applicationId", appId.toString() });
assertTrue(exitCode == 0);
ArgumentCaptor<ContainerLogsRequest> logsRequestCaptor = ArgumentCaptor.forClass(ContainerLogsRequest.class);
// we have two container reports, so make sure we have called
// printContainerLogsFromRunningApplication twice
verify(cli, times(2)).printContainerLogsFromRunningApplication(any(Configuration.class), logsRequestCaptor.capture(), any(LogCLIHelpers.class), anyBoolean());
// Verify that the log-type is "ALL"
List<ContainerLogsRequest> capturedRequests = logsRequestCaptor.getAllValues();
Assert.assertEquals(2, capturedRequests.size());
Set<String> logTypes0 = capturedRequests.get(0).getLogTypes();
Set<String> logTypes1 = capturedRequests.get(1).getLogTypes();
Assert.assertTrue(logTypes0.contains("ALL") && (logTypes0.size() == 1));
Assert.assertTrue(logTypes1.contains("ALL") && (logTypes1.size() == 1));
mockYarnClient = createMockYarnClientWithException(YarnApplicationState.RUNNING, ugi.getShortUserName());
LogsCLI cli2 = spy(new LogsCLIForTest(mockYarnClient));
doReturn(0).when(cli2).printContainerLogsFromRunningApplication(any(Configuration.class), any(ContainerLogsRequest.class), any(LogCLIHelpers.class), anyBoolean());
doReturn("123").when(cli2).getNodeHttpAddressFromRMWebString(any(ContainerLogsRequest.class));
cli2.setConf(new YarnConfiguration());
ContainerId containerId100 = ContainerId.newContainerId(appAttemptId, 100);
exitCode = cli2.run(new String[] { "-applicationId", appId.toString(), "-containerId", containerId100.toString(), "-nodeAddress", "NM:1234" });
assertTrue(exitCode == 0);
verify(cli2, times(1)).printContainerLogsFromRunningApplication(any(Configuration.class), logsRequestCaptor.capture(), any(LogCLIHelpers.class), anyBoolean());
}
use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.
the class TestLogsCLI method testFetchApplictionLogsHar.
@Test(timeout = 15000)
public void testFetchApplictionLogsHar() throws Exception {
String remoteLogRootDir = "target/logs/";
Configuration configuration = new Configuration();
configuration.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
configuration.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogRootDir);
configuration.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
configuration.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
FileSystem fs = FileSystem.get(configuration);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
URL harUrl = ClassLoader.getSystemClassLoader().getResource("application_1440536969523_0001.har");
assertNotNull(harUrl);
Path path = new Path(remoteLogRootDir + ugi.getShortUserName() + "/logs/application_1440536969523_0001");
if (fs.exists(path)) {
fs.delete(path, true);
}
assertTrue(fs.mkdirs(path));
Path harPath = new Path(path, "application_1440536969523_0001.har");
fs.copyFromLocalFile(false, new Path(harUrl.toURI()), harPath);
assertTrue(fs.exists(harPath));
YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED, ugi.getShortUserName());
LogsCLI cli = new LogsCLIForTest(mockYarnClient);
cli.setConf(configuration);
int exitCode = cli.run(new String[] { "-applicationId", "application_1440536969523_0001" });
assertTrue(exitCode == 0);
String out = sysOutStream.toString();
assertTrue(out.contains("container_1440536969523_0001_01_000001 on host1_1111"));
assertTrue(out.contains("Hello stderr"));
assertTrue(out.contains("Hello stdout"));
assertTrue(out.contains("Hello syslog"));
assertTrue(out.contains("container_1440536969523_0001_01_000002 on host2_2222"));
assertTrue(out.contains("Goodbye stderr"));
assertTrue(out.contains("Goodbye stdout"));
assertTrue(out.contains("Goodbye syslog"));
sysOutStream.reset();
fs.delete(new Path(remoteLogRootDir), true);
}
Aggregations