use of org.apache.spark.streaming.api.java.JavaStreamingContext in project learning-spark by databricks.
the class LogAnalyzerAppMain method main.
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// Startup the Spark Conf.
SparkConf conf = new SparkConf().setAppName("A Databricks Reference Application: Logs Analysis with Spark");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Flags.getInstance().getSlideInterval());
// Checkpointing must be enabled to use the updateStateByKey function & windowed operations.
jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());
// This methods monitors a directory for new files to read in for streaming.
JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());
JavaDStream<ApacheAccessLog> accessLogsDStream = logData.map(new Functions.ParseFromLogLine()).cache();
final LogAnalyzerTotal logAnalyzerTotal = new LogAnalyzerTotal();
final LogAnalyzerWindowed logAnalyzerWindowed = new LogAnalyzerWindowed();
// Process the DStream which gathers stats for all of time.
logAnalyzerTotal.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Calculate statistics for the last time interval.
logAnalyzerWindowed.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Render the output each time there is a new RDD in the accessLogsDStream.
final Renderer renderer = new Renderer();
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
public Void call(JavaRDD<ApacheAccessLog> rdd) {
// Call this to output the stats.
try {
renderer.render(logAnalyzerTotal.getLogStatistics(), logAnalyzerWindowed.getLogStatistics());
} catch (Exception e) {
}
return null;
}
});
// Start the streaming server.
// Start the computation
jssc.start();
// Wait for the computation to terminate
jssc.awaitTermination();
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project learning-spark by databricks.
the class KafkaInput method main.
public static void main(String[] args) throws Exception {
String zkQuorum = args[0];
String group = args[1];
SparkConf conf = new SparkConf().setAppName("KafkaInput");
// Create a StreamingContext with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000));
Map<String, Integer> topics = new HashMap<String, Integer>();
topics.put("pandas", 1);
JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics);
input.print();
// start our streaming context and wait for it to "finish"
jssc.start();
// Wait for 10 seconds then exit. To run forever call without a timeout
jssc.awaitTermination(10000);
// Stop the streaming context
jssc.stop();
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project beam by apache.
the class TrackStreamingSourcesTest method testTrackSingle.
@Test
public void testTrackSingle() {
options.setRunner(SparkRunner.class);
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
JavaStreamingContext jssc = new JavaStreamingContext(jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis()));
Pipeline p = Pipeline.create(options);
CreateStream<Integer> emptyStream = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())).emptyBatch();
p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>()));
p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0));
assertThat(StreamingSourceTracker.numAssertions, equalTo(1));
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project beam by apache.
the class SparkRunnerStreamingContextFactory method call.
@Override
public JavaStreamingContext call() throws Exception {
LOG.info("Creating a new Spark Streaming Context");
// validate unbounded read properties.
checkArgument(options.getMinReadTimeMillis() < options.getBatchIntervalMillis(), "Minimum read time has to be less than batch time.");
checkArgument(options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1, "Read time percentage is bound to (0, 1).");
SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
Duration batchDuration = new Duration(options.getBatchIntervalMillis());
LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds());
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);
// We must first init accumulators since translators expect them to be instantiated.
SparkRunner.initAccumulators(options, jsc);
EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc);
// update cache candidates
SparkRunner.updateCacheCandidates(pipeline, translator, ctxt);
pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt));
ctxt.computeOutputs();
checkpoint(jssc, checkpointDir);
return jssc;
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project cdap by caskdata.
the class SparkStreamingPipelineDriver method run.
private JavaStreamingContext run(final DataStreamsPipelineSpec pipelineSpec, final PipelinePhase pipelinePhase, final JavaSparkExecutionContext sec, @Nullable final String checkpointDir) throws Exception {
Function0<JavaStreamingContext> contextFunction = new Function0<JavaStreamingContext>() {
@Override
public JavaStreamingContext call() throws Exception {
JavaStreamingContext jssc = new JavaStreamingContext(new JavaSparkContext(), Durations.milliseconds(pipelineSpec.getBatchIntervalMillis()));
SparkStreamingPipelineRunner runner = new SparkStreamingPipelineRunner(sec, jssc, pipelineSpec, false);
PipelinePluginContext pluginContext = new PipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), pipelineSpec.isStageLoggingEnabled(), pipelineSpec.isProcessTimingEnabled());
// Seems like they should be set at configure time instead of runtime? but that requires an API change.
try {
runner.runPipeline(pipelinePhase, StreamingSource.PLUGIN_TYPE, sec, new HashMap<String, Integer>(), pluginContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
if (checkpointDir != null) {
jssc.checkpoint(checkpointDir);
}
return jssc;
}
};
return checkpointDir == null ? contextFunction.call() : StreamingCompat.getOrCreate(checkpointDir, contextFunction);
}
Aggregations