use of org.apache.beam.runners.spark.translation.streaming.SparkRunnerStreamingContextFactory in project beam by apache.
the class SparkRunner method run.
@Override
public SparkPipelineResult run(final Pipeline pipeline) {
LOG.info("Executing pipeline using the SparkRunner.");
final SparkPipelineResult result;
final Future<?> startPipeline;
final SparkPipelineTranslator translator;
final ExecutorService executorService = Executors.newSingleThreadExecutor();
MetricsEnvironment.setMetricsSupported(true);
// visit the pipeline to determine the translation mode
detectTranslationMode(pipeline);
if (mOptions.isStreaming()) {
CheckpointDir checkpointDir = new CheckpointDir(mOptions.getCheckpointDir());
SparkRunnerStreamingContextFactory streamingContextFactory = new SparkRunnerStreamingContextFactory(pipeline, mOptions, checkpointDir);
final JavaStreamingContext jssc = JavaStreamingContext.getOrCreate(checkpointDir.getSparkCheckpointDir().toString(), streamingContextFactory);
// Checkpoint aggregator/metrics values
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));
// register user-defined listeners.
for (JavaStreamingListener listener : mOptions.as(SparkContextOptions.class).getListeners()) {
LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
}
// register Watermarks listener to broadcast the advanced WMs.
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new WatermarksListener(jssc)));
// The reason we call initAccumulators here even though it is called in
// SparkRunnerStreamingContextFactory is because the factory is not called when resuming
// from checkpoint (When not resuming from checkpoint initAccumulators will be called twice
// but this is fine since it is idempotent).
initAccumulators(mOptions, jssc.sparkContext());
startPipeline = executorService.submit(new Runnable() {
@Override
public void run() {
LOG.info("Starting streaming pipeline execution.");
jssc.start();
}
});
result = new SparkPipelineResult.StreamingMode(startPipeline, jssc);
} else {
// create the evaluation context
final JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions);
final EvaluationContext evaluationContext = new EvaluationContext(jsc, pipeline, mOptions);
translator = new TransformTranslator.Translator();
// update the cache candidates
updateCacheCandidates(pipeline, translator, evaluationContext);
initAccumulators(mOptions, jsc);
startPipeline = executorService.submit(new Runnable() {
@Override
public void run() {
pipeline.traverseTopologically(new Evaluator(translator, evaluationContext));
evaluationContext.computeOutputs();
LOG.info("Batch pipeline execution complete.");
}
});
result = new SparkPipelineResult.BatchMode(startPipeline, jsc);
}
if (mOptions.getEnableSparkMetricSinks()) {
registerMetricsSource(mOptions.getAppName());
}
return result;
}
Aggregations