Search in sources :

Example 1 with SparkContextOptions

use of org.apache.beam.runners.spark.SparkContextOptions in project beam by apache.

the class SparkContextFactory method getSparkContext.

public static synchronized JavaSparkContext getSparkContext(SparkPipelineOptions options) {
    SparkContextOptions contextOptions = options.as(SparkContextOptions.class);
    usesProvidedSparkContext = contextOptions.getUsesProvidedSparkContext();
    // reuse should be ignored if the context is provided.
    if (Boolean.getBoolean(TEST_REUSE_SPARK_CONTEXT) && !usesProvidedSparkContext) {
        // if the context is null or stopped for some reason, re-create it.
        if (sparkContext == null || sparkContext.sc().isStopped()) {
            sparkContext = createSparkContext(contextOptions);
            sparkMaster = options.getSparkMaster();
        } else if (!options.getSparkMaster().equals(sparkMaster)) {
            throw new IllegalArgumentException(String.format("Cannot reuse spark context " + "with different spark master URL. Existing: %s, requested: %s.", sparkMaster, options.getSparkMaster()));
        }
        return sparkContext;
    } else {
        return createSparkContext(contextOptions);
    }
}
Also used : SparkContextOptions(org.apache.beam.runners.spark.SparkContextOptions)

Example 2 with SparkContextOptions

use of org.apache.beam.runners.spark.SparkContextOptions in project components by Talend.

the class BigQueryBeamRuntimeTestIT method createSparkRunnerPipeline.

// TODO extract this to utils
private Pipeline createSparkRunnerPipeline() {
    PipelineOptions o = PipelineOptionsFactory.create();
    SparkContextOptions options = o.as(SparkContextOptions.class);
    options.setProvidedSparkContext(jsc);
    options.setUsesProvidedSparkContext(true);
    options.setRunner(SparkRunner.class);
    runtimeContainer = new BeamJobRuntimeContainer(options);
    return Pipeline.create(options);
}
Also used : BeamJobRuntimeContainer(org.talend.components.adapter.beam.BeamJobRuntimeContainer) SparkContextOptions(org.apache.beam.runners.spark.SparkContextOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 3 with SparkContextOptions

use of org.apache.beam.runners.spark.SparkContextOptions in project components by Talend.

the class PubSubInputRuntimeTestIT method createSparkRunnerPipeline.

// TODO extract this to utils
private Pipeline createSparkRunnerPipeline() {
    PipelineOptions o = PipelineOptionsFactory.create();
    SparkContextOptions options = o.as(SparkContextOptions.class);
    JavaSparkContext jsc = new JavaSparkContext("local[2]", "PubSubInput");
    options.setProvidedSparkContext(jsc);
    options.setUsesProvidedSparkContext(true);
    options.setRunner(SparkRunner.class);
    runtimeContainer = new BeamJobRuntimeContainer(options);
    return Pipeline.create(options);
}
Also used : BeamJobRuntimeContainer(org.talend.components.adapter.beam.BeamJobRuntimeContainer) SparkContextOptions(org.apache.beam.runners.spark.SparkContextOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 4 with SparkContextOptions

use of org.apache.beam.runners.spark.SparkContextOptions in project components by Talend.

the class KinesisInputRuntimeTestIT method createSparkRunnerPipeline.

private Pipeline createSparkRunnerPipeline() {
    PipelineOptions o = PipelineOptionsFactory.create();
    SparkContextOptions options = o.as(SparkContextOptions.class);
    SparkConf conf = new SparkConf();
    conf.setAppName("KinesisInput");
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    options.setProvidedSparkContext(jsc);
    options.setUsesProvidedSparkContext(true);
    options.setRunner(SparkRunner.class);
    return Pipeline.create(options);
}
Also used : SparkContextOptions(org.apache.beam.runners.spark.SparkContextOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkContext(org.apache.spark.SparkContext) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Example 5 with SparkContextOptions

use of org.apache.beam.runners.spark.SparkContextOptions in project components by Talend.

the class AggregateRuntimeTest method createSparkRunnerPipeline.

private Pipeline createSparkRunnerPipeline() {
    PipelineOptions o = PipelineOptionsFactory.create();
    SparkContextOptions options = o.as(SparkContextOptions.class);
    SparkConf conf = new SparkConf();
    conf.setAppName("Aggregate");
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    options.setProvidedSparkContext(jsc);
    options.setUsesProvidedSparkContext(true);
    options.setRunner(SparkRunner.class);
    return Pipeline.create(options);
}
Also used : SparkContextOptions(org.apache.beam.runners.spark.SparkContextOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkContext(org.apache.spark.SparkContext) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Aggregations

SparkContextOptions (org.apache.beam.runners.spark.SparkContextOptions)6 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 BeamJobRuntimeContainer (org.talend.components.adapter.beam.BeamJobRuntimeContainer)3 SparkConf (org.apache.spark.SparkConf)2 SparkContext (org.apache.spark.SparkContext)2