Search in sources :

Example 1 with AppHandle

use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.

the class ApexRunner method run.

@Override
public ApexRunnerResult run(final Pipeline pipeline) {
    pipeline.replaceAll(getOverrides());
    final ApexPipelineTranslator translator = new ApexPipelineTranslator(options);
    final AtomicReference<DAG> apexDAG = new AtomicReference<>();
    StreamingApplication apexApp = new StreamingApplication() {

        @Override
        public void populateDAG(DAG dag, Configuration conf) {
            apexDAG.set(dag);
            dag.setAttribute(DAGContext.APPLICATION_NAME, options.getApplicationName());
            translator.translate(pipeline, dag);
        }
    };
    Properties configProperties = new Properties();
    try {
        if (options.getConfigFile() != null) {
            URI configURL = new URI(options.getConfigFile());
            if (CLASSPATH_SCHEME.equals(configURL.getScheme())) {
                InputStream is = this.getClass().getResourceAsStream(configURL.getPath());
                if (is != null) {
                    configProperties.load(is);
                    is.close();
                }
            } else {
                if (!configURL.isAbsolute()) {
                    // resolve as local file name
                    File f = new File(options.getConfigFile());
                    configURL = f.toURI();
                }
                try (InputStream is = configURL.toURL().openStream()) {
                    configProperties.load(is);
                }
            }
        }
    } catch (IOException | URISyntaxException ex) {
        throw new RuntimeException("Error loading properties", ex);
    }
    if (options.isEmbeddedExecution()) {
        EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
        Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
        launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
        if (options.isEmbeddedExecutionDebugMode()) {
            // turns off timeout checking for operator progress
            launchAttributes.put(EmbeddedAppLauncher.HEARTBEAT_MONITORING, false);
        }
        Configuration conf = new Configuration(false);
        ApexYarnLauncher.addProperties(conf, configProperties);
        try {
            if (translateOnly) {
                launcher.prepareDAG(apexApp, conf);
                return new ApexRunnerResult(launcher.getDAG(), null);
            }
            ApexRunner.ASSERTION_ERROR.set(null);
            AppHandle apexAppResult = launcher.launchApp(apexApp, conf, launchAttributes);
            return new ApexRunnerResult(apexDAG.get(), apexAppResult);
        } catch (Exception e) {
            Throwables.throwIfUnchecked(e);
            throw new RuntimeException(e);
        }
    } else {
        try {
            ApexYarnLauncher yarnLauncher = new ApexYarnLauncher();
            AppHandle apexAppResult = yarnLauncher.launchApp(apexApp, configProperties);
            return new ApexRunnerResult(apexDAG.get(), apexAppResult);
        } catch (IOException e) {
            throw new RuntimeException("Failed to launch the application on YARN.", e);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Attribute(com.datatorrent.api.Attribute) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) DAG(com.datatorrent.api.DAG) StreamingApplication(com.datatorrent.api.StreamingApplication) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) AppHandle(org.apache.apex.api.Launcher.AppHandle) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) ApexPipelineTranslator(org.apache.beam.runners.apex.translation.ApexPipelineTranslator) File(java.io.File) PTransformOverride(org.apache.beam.sdk.runners.PTransformOverride)

Example 2 with AppHandle

use of org.apache.apex.api.Launcher.AppHandle in project apex-malhar by apache.

the class ExactlyOnceJdbcOutputTest method testApplication.

@Test
public void testApplication() throws Exception {
    KafkaUnit ku = kafkaUnitRule.getKafkaUnit();
    String topicName = "testTopic";
    // topic creation is async and the producer may also auto-create it
    ku.createTopic(topicName, 1);
    // produce test data
    String[] words = "count the words from kafka and store them in the db".split("\\s+");
    for (String word : words) {
        ku.sendMessages(new KeyedMessage<String, String>(topicName, word));
    }
    Configuration conf = new Configuration(false);
    conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
    conf.set("apex.operator.kafkaInput.prop.topics", topicName);
    conf.set("apex.operator.kafkaInput.prop.clusters", "localhost:" + brokerPort);
    // consume one word per window
    conf.set("apex.operator.kafkaInput.prop.maxTuplesPerWindow", "1");
    conf.set("apex.operator.kafkaInput.prop.initialOffset", "EARLIEST");
    conf.set("apex.operator.store.prop.store.databaseDriver", DB_DRIVER);
    conf.set("apex.operator.store.prop.store.databaseUrl", DB_URL);
    EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
    Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
    // terminate after results are available
    launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
    AppHandle appHandle = launcher.launchApp(new ExactlyOnceJdbcOutputApp(), conf, launchAttributes);
    HashSet<String> wordsSet = Sets.newHashSet(words);
    Connection con = DriverManager.getConnection(DB_URL);
    Statement stmt = con.createStatement();
    int rowCount = 0;
    // 30s timeout
    long timeout = System.currentTimeMillis() + 30000;
    while (rowCount < wordsSet.size() && timeout > System.currentTimeMillis()) {
        Thread.sleep(500);
        String countQuery = "SELECT count(*) from " + TABLE_NAME;
        ResultSet resultSet = stmt.executeQuery(countQuery);
        resultSet.next();
        rowCount = resultSet.getInt(1);
        resultSet.close();
        LOG.info("current row count in {} is {}", TABLE_NAME, rowCount);
    }
    Assert.assertEquals("number of words", wordsSet.size(), rowCount);
    appHandle.shutdown(ShutdownMode.KILL);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KafkaUnit(info.batey.kafka.unit.KafkaUnit) Attribute(com.datatorrent.api.Attribute) Statement(java.sql.Statement) Connection(java.sql.Connection) AppHandle(org.apache.apex.api.Launcher.AppHandle) ResultSet(java.sql.ResultSet) Test(org.junit.Test)

Example 3 with AppHandle

use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.

the class ApexYarnLauncher method launchApp.

protected AppHandle launchApp(LaunchParams params) throws IOException {
    File tmpFile = File.createTempFile("beam-runner-apex", "params");
    tmpFile.deleteOnExit();
    try (FileOutputStream fos = new FileOutputStream(tmpFile)) {
        SerializationUtils.serialize(params, fos);
    }
    if (params.getCmd() == null) {
        ApexYarnLauncher.main(new String[] { tmpFile.getAbsolutePath() });
    } else {
        String cmd = params.getCmd() + " " + tmpFile.getAbsolutePath();
        ByteArrayOutputStream consoleOutput = new ByteArrayOutputStream();
        LOG.info("Executing: {} with {}", cmd, params.getEnv());
        ProcessBuilder pb = new ProcessBuilder("bash", "-c", cmd);
        Map<String, String> env = pb.environment();
        env.putAll(params.getEnv());
        Process p = pb.start();
        ProcessWatcher pw = new ProcessWatcher(p);
        InputStream output = p.getInputStream();
        InputStream error = p.getErrorStream();
        while (!pw.isFinished()) {
            IOUtils.copy(output, consoleOutput);
            IOUtils.copy(error, consoleOutput);
        }
        if (pw.rc != 0) {
            String msg = "The Beam Apex runner in non-embedded mode requires the Hadoop client" + " to be installed on the machine from which you launch the job" + " and the 'hadoop' script in $PATH";
            LOG.error(msg);
            throw new RuntimeException("Failed to run: " + cmd + " (exit code " + pw.rc + ")" + "\n" + consoleOutput.toString());
        }
    }
    return new AppHandle() {

        @Override
        public boolean isFinished() {
            // TODO (future PR): interaction with child process
            LOG.warn("YARN application runs asynchronously and status check not implemented.");
            return true;
        }

        @Override
        public void shutdown(ShutdownMode arg0) throws LauncherException {
            // TODO (future PR): interaction with child process
            throw new UnsupportedOperationException();
        }
    };
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) AppHandle(org.apache.apex.api.Launcher.AppHandle) FileOutputStream(java.io.FileOutputStream) ShutdownMode(org.apache.apex.api.Launcher.ShutdownMode) JarFile(java.util.jar.JarFile) File(java.io.File)

Example 4 with AppHandle

use of org.apache.apex.api.Launcher.AppHandle in project beam by apache.

the class ApexYarnLauncher method main.

/**
   * The main method expects the serialized DAG and will launch the YARN application.
   * @param args location of launch parameters
   * @throws IOException when parameters cannot be read
   */
public static void main(String[] args) throws IOException {
    checkArgument(args.length == 1, "exactly one argument expected");
    File file = new File(args[0]);
    checkArgument(file.exists() && file.isFile(), "invalid file path %s", file);
    final LaunchParams params = (LaunchParams) SerializationUtils.deserialize(new FileInputStream(file));
    StreamingApplication apexApp = new StreamingApplication() {

        @Override
        public void populateDAG(DAG dag, Configuration conf) {
            copyShallow(params.dag, dag);
        }
    };
    // configuration from Hadoop client
    Configuration conf = new Configuration();
    addProperties(conf, params.configProperties);
    AppHandle appHandle = params.getApexLauncher().launchApp(apexApp, conf, params.launchAttributes);
    if (appHandle == null) {
        throw new AssertionError("Launch returns null handle.");
    }
// TODO (future PR)
// At this point the application is running, but this process should remain active to
// allow the parent to implement the runner result.
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) StreamingApplication(com.datatorrent.api.StreamingApplication) DAG(com.datatorrent.api.DAG) AppHandle(org.apache.apex.api.Launcher.AppHandle) JarFile(java.util.jar.JarFile) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 5 with AppHandle

use of org.apache.apex.api.Launcher.AppHandle in project apex-malhar by apache.

the class ExactlyOnceFileOutputAppTest method testApplication.

@Test
public void testApplication() throws Exception {
    File targetDir = new File(TARGET_DIR);
    FileUtils.deleteDirectory(targetDir);
    FileUtils.forceMkdir(targetDir);
    KafkaUnit ku = kafkaUnitRule.getKafkaUnit();
    String topicName = "testTopic";
    // topic creation is async and the producer may also auto-create it
    ku.createTopic(topicName, 1);
    // produce test data
    String[] words = "count count the words from kafka and store them in a file".split("\\s+");
    for (String word : words) {
        ku.sendMessages(new KeyedMessage<String, String>(topicName, word));
    }
    Configuration conf = new Configuration(false);
    conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
    conf.set("apex.operator.kafkaInput.prop.topics", topicName);
    conf.set("apex.operator.kafkaInput.prop.clusters", "localhost:" + brokerPort);
    // consume one word per window
    conf.set("apex.operator.kafkaInput.prop.maxTuplesPerWindow", "2");
    conf.set("apex.operator.kafkaInput.prop.initialOffset", "EARLIEST");
    conf.set("apex.operator.fileWriter.prop.filePath", TARGET_DIR);
    EmbeddedAppLauncher<?> launcher = Launcher.getLauncher(LaunchMode.EMBEDDED);
    Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap();
    // terminate after results are available
    launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true);
    AppHandle appHandle = launcher.launchApp(new ExactlyOnceFileOutputApp(), conf, launchAttributes);
    // 60s timeout
    long timeout = System.currentTimeMillis() + 60000;
    File outputFile = new File(TARGET_DIR, ExactlyOnceFileOutputApp.FileWriter.FILE_NAME_PREFIX);
    while (!outputFile.exists() && timeout > System.currentTimeMillis()) {
        Thread.sleep(1000);
        LOG.debug("Waiting for {}", outputFile);
    }
    Assert.assertTrue("output file exists " + ExactlyOnceFileOutputApp.FileWriter.FILE_NAME_PREFIX, outputFile.exists() && outputFile.isFile());
    String result = FileUtils.readFileToString(outputFile);
    Assert.assertTrue(result.contains("count=2"));
    appHandle.shutdown(ShutdownMode.KILL);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KafkaUnit(info.batey.kafka.unit.KafkaUnit) Attribute(com.datatorrent.api.Attribute) AppHandle(org.apache.apex.api.Launcher.AppHandle) File(java.io.File) Test(org.junit.Test)

Aggregations

AppHandle (org.apache.apex.api.Launcher.AppHandle)5 File (java.io.File)4 Configuration (org.apache.hadoop.conf.Configuration)4 Attribute (com.datatorrent.api.Attribute)3 DAG (com.datatorrent.api.DAG)2 StreamingApplication (com.datatorrent.api.StreamingApplication)2 KafkaUnit (info.batey.kafka.unit.KafkaUnit)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 JarFile (java.util.jar.JarFile)2 Test (org.junit.Test)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 Properties (java.util.Properties)1