use of org.apache.spark.SparkConf in project geode by apache.
the class PairRDDSaveJavaDemo method main.
public static void main(String[] argv) {
if (argv.length != 1) {
System.err.printf("Usage: PairRDDSaveJavaDemo <locators>\n");
return;
}
SparkConf conf = new SparkConf().setAppName("PairRDDSaveJavaDemo");
conf.set(GeodeLocatorPropKey, argv[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
GeodeConnectionConf connConf = GeodeConnectionConf.apply(conf);
List<Tuple2<String, String>> data = new ArrayList<>();
data.add(new Tuple2<>("7", "seven"));
data.add(new Tuple2<>("8", "eight"));
data.add(new Tuple2<>("9", "nine"));
List<Tuple2<String, String>> data2 = new ArrayList<Tuple2<String, String>>();
data2.add(new Tuple2<>("11", "eleven"));
data2.add(new Tuple2<>("12", "twelve"));
data2.add(new Tuple2<>("13", "thirteen"));
// method 1: generate JavaPairRDD directly
JavaPairRDD<String, String> rdd1 = sc.parallelizePairs(data);
javaFunctions(rdd1).saveToGeode("str_str_region", connConf);
// method 2: convert JavaRDD<Tuple2<K,V>> to JavaPairRDD<K, V>
JavaRDD<Tuple2<String, String>> rdd2 = sc.parallelize(data2);
javaFunctions(toJavaPairRDD(rdd2)).saveToGeode("str_str_region", connConf);
sc.stop();
}
use of org.apache.spark.SparkConf in project geode by apache.
the class RegionToRDDJavaDemo method main.
public static void main(String[] argv) {
if (argv.length != 1) {
System.err.printf("Usage: RegionToRDDJavaDemo <locators>\n");
return;
}
SparkConf conf = new SparkConf().setAppName("RegionToRDDJavaDemo");
conf.set(GeodeLocatorPropKey, argv[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
JavaPairRDD<String, String> rdd = javaFunctions(sc).geodeRegion("str_str_region");
System.out.println("=== geodeRegion =======\n" + rdd.collect() + "\n=========================");
sc.stop();
}
use of org.apache.spark.SparkConf in project ignite by apache.
the class SharedRDDExample method main.
/**
* Executes the example.
* @param args Command line arguments, none required.
*/
public static void main(String[] args) {
// Spark Configuration.
SparkConf sparkConf = new SparkConf().setAppName("JavaIgniteRDDExample").setMaster("local").set("spark.executor.instances", "2");
// Spark context.
JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Adjust the logger to exclude the logs of no interest.
Logger.getRootLogger().setLevel(Level.ERROR);
Logger.getLogger("org.apache.ignite").setLevel(Level.INFO);
// Creates Ignite context with specific configuration and runs Ignite in the embedded mode.
JavaIgniteContext<Integer, Integer> igniteContext = new JavaIgniteContext<Integer, Integer>(sparkContext, "examples/config/spark/example-shared-rdd.xml", false);
// Create a Java Ignite RDD of Type (Int,Int) Integer Pair.
JavaIgniteRDD<Integer, Integer> sharedRDD = igniteContext.<Integer, Integer>fromCache("sharedRDD");
// Define data to be stored in the Ignite RDD (cache).
List<Integer> data = new ArrayList<>(20);
for (int i = 0; i < 20; i++) {
data.add(i);
}
// Preparing a Java RDD.
JavaRDD<Integer> javaRDD = sparkContext.<Integer>parallelize(data);
// Fill the Ignite RDD in with Int pairs. Here Pairs are represented as Scala Tuple2.
sharedRDD.savePairs(javaRDD.<Integer, Integer>mapToPair(new PairFunction<Integer, Integer, Integer>() {
@Override
public Tuple2<Integer, Integer> call(Integer val) throws Exception {
return new Tuple2<Integer, Integer>(val, val);
}
}));
System.out.println(">>> Iterating over Ignite Shared RDD...");
// Iterate over the Ignite RDD.
sharedRDD.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
@Override
public void call(Tuple2<Integer, Integer> tuple) throws Exception {
System.out.println("(" + tuple._1 + "," + tuple._2 + ")");
}
});
System.out.println(">>> Transforming values stored in Ignite Shared RDD...");
// Filter out even values as a transformed RDD.
JavaPairRDD<Integer, Integer> transformedValues = sharedRDD.filter(new Function<Tuple2<Integer, Integer>, Boolean>() {
@Override
public Boolean call(Tuple2<Integer, Integer> tuple) throws Exception {
return tuple._2() % 2 == 0;
}
});
// Print out the transformed values.
transformedValues.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
@Override
public void call(Tuple2<Integer, Integer> tuple) throws Exception {
System.out.println("(" + tuple._1 + "," + tuple._2 + ")");
}
});
System.out.println(">>> Executing SQL query over Ignite Shared RDD...");
// Execute SQL query over the Ignite RDD.
Dataset df = sharedRDD.sql("select _val from Integer where _key < 9");
// Show the result of the execution.
df.show();
// Close IgniteContext on all the workers.
igniteContext.close(true);
}
use of org.apache.spark.SparkConf in project cdap by caskdata.
the class ETLSpark method initialize.
@Override
public void initialize() throws Exception {
SparkClientContext context = getContext();
cleanupFiles = new ArrayList<>();
CompositeFinisher.Builder finishers = CompositeFinisher.builder();
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.driver.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.executor.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.speculation", "false");
context.setSparkConf(sparkConf);
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
}
MacroEvaluator evaluator = new DefaultMacroEvaluator(context.getWorkflowToken(), context.getRuntimeArguments(), context.getLogicalStartTime(), context, context.getNamespace());
SparkBatchSourceFactory sourceFactory = new SparkBatchSourceFactory();
SparkBatchSinkFactory sinkFactory = new SparkBatchSinkFactory();
Map<String, Integer> stagePartitions = new HashMap<>();
PluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
for (StageInfo stageInfo : phaseSpec.getPhase()) {
String stageName = stageInfo.getName();
String pluginType = stageInfo.getPluginType();
if (BatchSource.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<BatchSourceContext> batchSource = pluginContext.newPluginInstance(stageName, evaluator);
BatchSourceContext sourceContext = new SparkBatchSourceContext(sourceFactory, context, stageInfo);
batchSource.prepareRun(sourceContext);
finishers.add(batchSource, sourceContext);
} else if (BatchSink.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<BatchSinkContext> batchSink = pluginContext.newPluginInstance(stageName, evaluator);
BatchSinkContext sinkContext = new SparkBatchSinkContext(sinkFactory, context, null, stageInfo);
batchSink.prepareRun(sinkContext);
finishers.add(batchSink, sinkContext);
} else if (SparkSink.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<SparkPluginContext> sparkSink = pluginContext.newPluginInstance(stageName, evaluator);
SparkPluginContext sparkPluginContext = new BasicSparkPluginContext(context, stageInfo);
sparkSink.prepareRun(sparkPluginContext);
finishers.add(sparkSink, sparkPluginContext);
} else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator aggregator = pluginContext.newPluginInstance(stageName, evaluator);
DefaultAggregatorContext aggregatorContext = new DefaultAggregatorContext(context, stageInfo);
aggregator.prepareRun(aggregatorContext);
finishers.add(aggregator, aggregatorContext);
stagePartitions.put(stageName, aggregatorContext.getNumPartitions());
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner joiner = pluginContext.newPluginInstance(stageName, evaluator);
DefaultJoinerContext sparkJoinerContext = new DefaultJoinerContext(context, stageInfo);
joiner.prepareRun(sparkJoinerContext);
finishers.add(joiner, sparkJoinerContext);
stagePartitions.put(stageName, sparkJoinerContext.getNumPartitions());
}
}
File configFile = File.createTempFile("HydratorSpark", ".config");
cleanupFiles.add(configFile);
try (Writer writer = Files.newBufferedWriter(configFile.toPath(), StandardCharsets.UTF_8)) {
SparkBatchSourceSinkFactoryInfo sourceSinkInfo = new SparkBatchSourceSinkFactoryInfo(sourceFactory, sinkFactory, stagePartitions);
writer.write(GSON.toJson(sourceSinkInfo));
}
finisher = finishers.build();
context.localize("HydratorSpark.config", configFile.toURI());
}
use of org.apache.spark.SparkConf in project cdap by caskdata.
the class ExternalSparkProgram method initialize.
@Override
protected void initialize() throws Exception {
SparkClientContext context = getContext();
String stageName = context.getSpecification().getProperty(STAGE_NAME);
Map<String, String> pluginProperties = context.getPluginProperties(stageName).getProperties();
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.driver.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.executor.extraJavaOptions", "-XX:MaxPermSize=256m");
for (Map.Entry<String, String> pluginProperty : pluginProperties.entrySet()) {
String key = pluginProperty.getKey();
String val = pluginProperty.getValue();
if (!key.equals(PROGRAM_ARGS)) {
sparkConf.set(key, val);
}
}
context.setSparkConf(sparkConf);
}
Aggregations