use of org.apache.spark.api.java.JavaSparkContext in project geode by apache.
the class RDDSaveJavaDemo method main.
public static void main(String[] argv) {
if (argv.length != 1) {
System.err.printf("Usage: RDDSaveJavaDemo <locators>\n");
return;
}
SparkConf conf = new SparkConf().setAppName("RDDSaveJavaDemo");
conf.set(GeodeLocatorPropKey, argv[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> data = new ArrayList<String>();
data.add("abcdefg");
data.add("abcdefgh");
data.add("abcdefghi");
JavaRDD<String> rdd = sc.parallelize(data);
GeodeConnectionConf connConf = GeodeConnectionConf.apply(conf);
PairFunction<String, String, Integer> func = new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s, s.length());
}
};
javaFunctions(rdd).saveToGeode("str_int_region", func, connConf);
sc.stop();
}
use of org.apache.spark.api.java.JavaSparkContext in project gora by apache.
the class SparkWordCount method wordCount.
public int wordCount(DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws IOException {
//Spark engine initialization
GoraSparkEngine<String, WebPage> goraSparkEngine = new GoraSparkEngine<>(String.class, WebPage.class);
SparkConf sparkConf = new SparkConf().setAppName("Gora Spark Word Count Application").setMaster("local");
Class[] c = new Class[1];
c[0] = inStore.getPersistentClass();
sparkConf.registerKryoClasses(c);
//
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaPairRDD<String, WebPage> goraRDD = goraSparkEngine.initialize(sc, inStore);
long count = goraRDD.count();
log.info("Total Web page count: {}", count);
JavaRDD<Tuple2<String, Long>> mappedGoraRdd = goraRDD.values().map(mapFunc);
JavaPairRDD<String, Long> reducedGoraRdd = JavaPairRDD.fromJavaRDD(mappedGoraRdd).reduceByKey(redFunc);
//Print output for debug purpose
log.info("SparkWordCount debug purpose TokenDatum print starts:");
Map<String, Long> tokenDatumMap = reducedGoraRdd.collectAsMap();
for (String key : tokenDatumMap.keySet()) {
log.info(key);
log.info(tokenDatumMap.get(key).toString());
}
log.info("SparkWordCount debug purpose TokenDatum print ends:");
//
//write output to datastore
Configuration sparkHadoopConf = goraSparkEngine.generateOutputConf(outStore);
reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf);
return 1;
}
use of org.apache.spark.api.java.JavaSparkContext in project geode by apache.
the class JavaApiIntegrationTest method setUpBeforeClass.
@BeforeClass
public static void setUpBeforeClass() throws Exception {
// start geode cluster, and spark context
Properties settings = new Properties();
settings.setProperty(ConfigurationProperties.CACHE_XML_FILE, "src/it/resources/test-retrieve-regions.xml");
settings.setProperty("num-of-servers", Integer.toString(numServers));
int locatorPort = GeodeCluster$.MODULE$.start(settings);
// start spark context in local mode
Properties props = new Properties();
props.put("log4j.logger.org.apache.spark", "INFO");
props.put("log4j.logger.org.apache.geode.spark.connector", "DEBUG");
IOUtils.configTestLog4j("ERROR", props);
SparkConf conf = new SparkConf().setAppName("RetrieveRegionIntegrationTest").setMaster("local[2]").set(package$.MODULE$.GeodeLocatorPropKey(), "localhost:" + locatorPort);
// sc = new SparkContext(conf);
jsc = new JavaSparkContext(conf);
connConf = GeodeConnectionConf.apply(jsc.getConf());
}
use of org.apache.spark.api.java.JavaSparkContext in project beam by apache.
the class TrackStreamingSourcesTest method testTrackSingle.
@Test
public void testTrackSingle() {
options.setRunner(SparkRunner.class);
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
JavaStreamingContext jssc = new JavaStreamingContext(jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis()));
Pipeline p = Pipeline.create(options);
CreateStream<Integer> emptyStream = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())).emptyBatch();
p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>()));
p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0));
assertThat(StreamingSourceTracker.numAssertions, equalTo(1));
}
use of org.apache.spark.api.java.JavaSparkContext in project beam by apache.
the class SparkRunnerStreamingContextFactory method call.
@Override
public JavaStreamingContext call() throws Exception {
LOG.info("Creating a new Spark Streaming Context");
// validate unbounded read properties.
checkArgument(options.getMinReadTimeMillis() < options.getBatchIntervalMillis(), "Minimum read time has to be less than batch time.");
checkArgument(options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1, "Read time percentage is bound to (0, 1).");
SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
Duration batchDuration = new Duration(options.getBatchIntervalMillis());
LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds());
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);
// We must first init accumulators since translators expect them to be instantiated.
SparkRunner.initAccumulators(options, jsc);
EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc);
// update cache candidates
SparkRunner.updateCacheCandidates(pipeline, translator, ctxt);
pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt));
ctxt.computeOutputs();
checkpoint(jssc, checkpointDir);
return jssc;
}
Aggregations