use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.
the class ApexRunner method run.
@Override
public ApexRunnerResult run(final Pipeline pipeline) {
pipeline.replaceAll(getOverrides());
final ApexPipelineTranslator translator = new ApexPipelineTranslator(options);
final AtomicReference<DAG> apexDAG = new AtomicReference<>();
StreamingApplication apexApp = new StreamingApplication() {
@Override
public void populateDAG(DAG dag, Configuration conf) {
apexDAG.set(dag);
dag.setAttribute(DAGContext.APPLICATION_NAME, options.getApplicationName());
translator.translate(pipeline, dag);
}
};
Properties configProperties = new Properties();
try {
if (options.getConfigFile() != null) {
URI configURL = new URI(options.getConfigFile());
if (CLASSPATH_SCHEME.equals(configURL.getScheme())) {
InputStream is = this.getClass().getResourceAsStream(configURL.getPath());
if (is != null) {
configProperties.load(is);
is.close();
}
} else {
if (!configURL.isAbsolute()) {
// resolve as local file name
File f = new File(options.getConfigFile());
configURL = f.toURI();
}
try (InputStream is = configURL.toURL().openStream()) {
configProperties.load(is);
}
}
}
} catch (IOException | URISyntaxException ex) {
throw new RuntimeException("Error loading properties", ex);
}
if (options.isEmbeddedExecution()) {
EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
if (options.isEmbeddedExecutionDebugMode()) {
// turns off timeout checking for operator progress
launchAttributes.put(EmbeddedAppLauncher.HEARTBEAT_MONITORING, false);
}
Configuration conf = new Configuration(false);
ApexYarnLauncher.addProperties(conf, configProperties);
try {
if (translateOnly) {
launcher.prepareDAG(apexApp, conf);
return new ApexRunnerResult(launcher.getDAG(), null);
}
ApexRunner.ASSERTION_ERROR.set(null);
AppHandle apexAppResult = launcher.launchApp(apexApp, conf, launchAttributes);
return new ApexRunnerResult(apexDAG.get(), apexAppResult);
} catch (Exception e) {
Throwables.throwIfUnchecked(e);
throw new RuntimeException(e);
}
} else {
try {
ApexYarnLauncher yarnLauncher = new ApexYarnLauncher();
AppHandle apexAppResult = yarnLauncher.launchApp(apexApp, configProperties);
return new ApexRunnerResult(apexDAG.get(), apexAppResult);
} catch (IOException e) {
throw new RuntimeException("Failed to launch the application on YARN.", e);
}
}
}
use of org.apache.apex.api.Launcher.AppHandle in project apex-malhar by apache.
the class ExactlyOnceJdbcOutputTest method testApplication.
@Test
public void testApplication() throws Exception {
KafkaUnit ku = kafkaUnitRule.getKafkaUnit();
String topicName = "testTopic";
// topic creation is async and the producer may also auto-create it
ku.createTopic(topicName, 1);
// produce test data
String[] words = "count the words from kafka and store them in the db".split("\\s+");
for (String word : words) {
ku.sendMessages(new KeyedMessage<String, String>(topicName, word));
}
Configuration conf = new Configuration(false);
conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
conf.set("apex.operator.kafkaInput.prop.topics", topicName);
conf.set("apex.operator.kafkaInput.prop.clusters", "localhost:" + brokerPort);
// consume one word per window
conf.set("apex.operator.kafkaInput.prop.maxTuplesPerWindow", "1");
conf.set("apex.operator.kafkaInput.prop.initialOffset", "EARLIEST");
conf.set("apex.operator.store.prop.store.databaseDriver", DB_DRIVER);
conf.set("apex.operator.store.prop.store.databaseUrl", DB_URL);
EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
// terminate after results are available
launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
AppHandle appHandle = launcher.launchApp(new ExactlyOnceJdbcOutputApp(), conf, launchAttributes);
HashSet<String> wordsSet = Sets.newHashSet(words);
Connection con = DriverManager.getConnection(DB_URL);
Statement stmt = con.createStatement();
int rowCount = 0;
// 30s timeout
long timeout = System.currentTimeMillis() + 30000;
while (rowCount < wordsSet.size() && timeout > System.currentTimeMillis()) {
Thread.sleep(500);
String countQuery = "SELECT count(*) from " + TABLE_NAME;
ResultSet resultSet = stmt.executeQuery(countQuery);
resultSet.next();
rowCount = resultSet.getInt(1);
resultSet.close();
LOG.info("current row count in {} is {}", TABLE_NAME, rowCount);
}
Assert.assertEquals("number of words", wordsSet.size(), rowCount);
appHandle.shutdown(ShutdownMode.KILL);
}
use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.
the class ApexYarnLauncher method launchApp.
protected AppHandle launchApp(LaunchParams params) throws IOException {
File tmpFile = File.createTempFile("beam-runner-apex", "params");
tmpFile.deleteOnExit();
try (FileOutputStream fos = new FileOutputStream(tmpFile)) {
SerializationUtils.serialize(params, fos);
}
if (params.getCmd() == null) {
ApexYarnLauncher.main(new String[] { tmpFile.getAbsolutePath() });
} else {
String cmd = params.getCmd() + " " + tmpFile.getAbsolutePath();
ByteArrayOutputStream consoleOutput = new ByteArrayOutputStream();
LOG.info("Executing: {} with {}", cmd, params.getEnv());
ProcessBuilder pb = new ProcessBuilder("bash", "-c", cmd);
Map<String, String> env = pb.environment();
env.putAll(params.getEnv());
Process p = pb.start();
ProcessWatcher pw = new ProcessWatcher(p);
InputStream output = p.getInputStream();
InputStream error = p.getErrorStream();
while (!pw.isFinished()) {
IOUtils.copy(output, consoleOutput);
IOUtils.copy(error, consoleOutput);
}
if (pw.rc != 0) {
String msg = "The Beam Apex runner in non-embedded mode requires the Hadoop client" + " to be installed on the machine from which you launch the job" + " and the 'hadoop' script in $PATH";
LOG.error(msg);
throw new RuntimeException("Failed to run: " + cmd + " (exit code " + pw.rc + ")" + "\n" + consoleOutput.toString());
}
}
return new AppHandle() {
@Override
public boolean isFinished() {
// TODO (future PR): interaction with child process
LOG.warn("YARN application runs asynchronously and status check not implemented.");
return true;
}
@Override
public void shutdown(ShutdownMode arg0) throws LauncherException {
// TODO (future PR): interaction with child process
throw new UnsupportedOperationException();
}
};
}
use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.
the class ApexYarnLauncher method main.
/**
* The main method expects the serialized DAG and will launch the YARN application.
* @param args location of launch parameters
* @throws IOException when parameters cannot be read
*/
public static void main(String[] args) throws IOException {
checkArgument(args.length == 1, "exactly one argument expected");
File file = new File(args[0]);
checkArgument(file.exists() && file.isFile(), "invalid file path %s", file);
final LaunchParams params = (LaunchParams) SerializationUtils.deserialize(new FileInputStream(file));
StreamingApplication apexApp = new StreamingApplication() {
@Override
public void populateDAG(DAG dag, Configuration conf) {
copyShallow(params.dag, dag);
}
};
// configuration from Hadoop client
Configuration conf = new Configuration();
addProperties(conf, params.configProperties);
AppHandle appHandle = params.getApexLauncher().launchApp(apexApp, conf, params.launchAttributes);
if (appHandle == null) {
throw new AssertionError("Launch returns null handle.");
}
// TODO (future PR)
// At this point the application is running, but this process should remain active to
// allow the parent to implement the runner result.
}
use of org.apache.apex.api.Launcher.AppHandle in project apex-malhar by apache.
the class ExactlyOnceFileOutputAppTest method testApplication.
@Test
public void testApplication() throws Exception {
File targetDir = new File(TARGET_DIR);
FileUtils.deleteDirectory(targetDir);
FileUtils.forceMkdir(targetDir);
KafkaUnit ku = kafkaUnitRule.getKafkaUnit();
String topicName = "testTopic";
// topic creation is async and the producer may also auto-create it
ku.createTopic(topicName, 1);
// produce test data
String[] words = "count count the words from kafka and store them in a file".split("\\s+");
for (String word : words) {
ku.sendMessages(new KeyedMessage<String, String>(topicName, word));
}
Configuration conf = new Configuration(false);
conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
conf.set("apex.operator.kafkaInput.prop.topics", topicName);
conf.set("apex.operator.kafkaInput.prop.clusters", "localhost:" + brokerPort);
// consume one word per window
conf.set("apex.operator.kafkaInput.prop.maxTuplesPerWindow", "2");
conf.set("apex.operator.kafkaInput.prop.initialOffset", "EARLIEST");
conf.set("apex.operator.fileWriter.prop.filePath", TARGET_DIR);
EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
// terminate after results are available
launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
AppHandle appHandle = launcher.launchApp(new ExactlyOnceFileOutputApp(), conf, launchAttributes);
// 60s timeout
long timeout = System.currentTimeMillis() + 60000;
File outputFile = new File(TARGET_DIR, ExactlyOnceFileOutputApp.FileWriter.FILE_NAME_PREFIX);
while (!outputFile.exists() && timeout > System.currentTimeMillis()) {
Thread.sleep(1000);
LOG.debug("Waiting for {}", outputFile);
}
Assert.assertTrue("output file exists " + ExactlyOnceFileOutputApp.FileWriter.FILE_NAME_PREFIX, outputFile.exists() && outputFile.isFile());
String result = FileUtils.readFileToString(outputFile);
Assert.assertTrue(result.contains("count=2"));
appHandle.shutdown(ShutdownMode.KILL);
}
Aggregations