Search in sources :

Example 1 with CollectionEnvironment

use of org.apache.flink.api.java.CollectionEnvironment in project beam by apache.

the class FlinkPipelineExecutionEnvironment method createBatchExecutionEnvironment.

/**
   * If the submitted job is a batch processing job, this method creates the adequate
   * Flink {@link org.apache.flink.api.java.ExecutionEnvironment} depending
   * on the user-specified options.
   */
private ExecutionEnvironment createBatchExecutionEnvironment() {
    LOG.info("Creating the required Batch Execution Environment.");
    String masterUrl = options.getFlinkMaster();
    ExecutionEnvironment flinkBatchEnv;
    // depending on the master, create the right environment.
    if (masterUrl.equals("[local]")) {
        flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment();
    } else if (masterUrl.equals("[collection]")) {
        flinkBatchEnv = new CollectionEnvironment();
    } else if (masterUrl.equals("[auto]")) {
        flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
    } else if (masterUrl.matches(".*:\\d*")) {
        String[] parts = masterUrl.split(":");
        List<String> stagingFiles = options.getFilesToStage();
        flinkBatchEnv = ExecutionEnvironment.createRemoteEnvironment(parts[0], Integer.parseInt(parts[1]), stagingFiles.toArray(new String[stagingFiles.size()]));
    } else {
        LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
        flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
    }
    // set the correct parallelism.
    if (options.getParallelism() != -1 && !(flinkBatchEnv instanceof CollectionEnvironment)) {
        flinkBatchEnv.setParallelism(options.getParallelism());
    }
    // set parallelism in the options (required by some execution code)
    options.setParallelism(flinkBatchEnv.getParallelism());
    if (options.getObjectReuse()) {
        flinkBatchEnv.getConfig().enableObjectReuse();
    } else {
        flinkBatchEnv.getConfig().disableObjectReuse();
    }
    return flinkBatchEnv;
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CollectionEnvironment(org.apache.flink.api.java.CollectionEnvironment) List(java.util.List)

Example 2 with CollectionEnvironment

use of org.apache.flink.api.java.CollectionEnvironment in project beam by apache.

the class FlinkExecutionEnvironments method createBatchExecutionEnvironment.

static ExecutionEnvironment createBatchExecutionEnvironment(FlinkPipelineOptions options, List<String> filesToStage, @Nullable String confDir) {
    LOG.info("Creating a Batch Execution Environment.");
    // Although Flink uses Rest, it expects the address not to contain a http scheme
    String flinkMasterHostPort = stripHttpSchema(options.getFlinkMaster());
    Configuration flinkConfiguration = getFlinkConfiguration(confDir);
    ExecutionEnvironment flinkBatchEnv;
    // depending on the master, create the right environment.
    if ("[local]".equals(flinkMasterHostPort)) {
        setManagedMemoryByFraction(flinkConfiguration);
        disableClassLoaderLeakCheck(flinkConfiguration);
        flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment(flinkConfiguration);
    } else if ("[collection]".equals(flinkMasterHostPort)) {
        flinkBatchEnv = new CollectionEnvironment();
    } else if ("[auto]".equals(flinkMasterHostPort)) {
        flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
        if (flinkBatchEnv instanceof LocalEnvironment) {
            disableClassLoaderLeakCheck(flinkConfiguration);
            flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment(flinkConfiguration);
            flinkBatchEnv.setParallelism(getDefaultLocalParallelism());
        }
    } else {
        int defaultPort = flinkConfiguration.getInteger(RestOptions.PORT);
        HostAndPort hostAndPort = HostAndPort.fromString(flinkMasterHostPort).withDefaultPort(defaultPort);
        flinkConfiguration.setInteger(RestOptions.PORT, hostAndPort.getPort());
        flinkBatchEnv = ExecutionEnvironment.createRemoteEnvironment(hostAndPort.getHost(), hostAndPort.getPort(), flinkConfiguration, filesToStage.toArray(new String[filesToStage.size()]));
        LOG.info("Using Flink Master URL {}:{}.", hostAndPort.getHost(), hostAndPort.getPort());
    }
    // Set the execution mode for data exchange.
    flinkBatchEnv.getConfig().setExecutionMode(ExecutionMode.valueOf(options.getExecutionModeForBatch()));
    // set the correct parallelism.
    if (options.getParallelism() != -1 && !(flinkBatchEnv instanceof CollectionEnvironment)) {
        flinkBatchEnv.setParallelism(options.getParallelism());
    }
    // Set the correct parallelism, required by UnboundedSourceWrapper to generate consistent
    // splits.
    final int parallelism;
    if (flinkBatchEnv instanceof CollectionEnvironment) {
        parallelism = 1;
    } else {
        parallelism = determineParallelism(options.getParallelism(), flinkBatchEnv.getParallelism(), flinkConfiguration);
    }
    flinkBatchEnv.setParallelism(parallelism);
    // set parallelism in the options (required by some execution code)
    options.setParallelism(parallelism);
    if (options.getObjectReuse()) {
        flinkBatchEnv.getConfig().enableObjectReuse();
    } else {
        flinkBatchEnv.getConfig().disableObjectReuse();
    }
    applyLatencyTrackingInterval(flinkBatchEnv.getConfig(), options);
    return flinkBatchEnv;
}
Also used : HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) CollectionEnvironment(org.apache.flink.api.java.CollectionEnvironment) LocalEnvironment(org.apache.flink.api.java.LocalEnvironment)

Aggregations

CollectionEnvironment (org.apache.flink.api.java.CollectionEnvironment)2 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 List (java.util.List)1 HostAndPort (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort)1 LocalEnvironment (org.apache.flink.api.java.LocalEnvironment)1 Configuration (org.apache.flink.configuration.Configuration)1 GlobalConfiguration (org.apache.flink.configuration.GlobalConfiguration)1