use of org.apache.beam.runners.jobsubmission.PortablePipelineResult in project beam by apache.
the class SparkPipelineRunner method run.
@Override
public PortablePipelineResult run(RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
SparkPortablePipelineTranslator translator;
boolean isStreaming = pipelineOptions.isStreaming() || hasUnboundedPCollections(pipeline);
if (isStreaming) {
translator = new SparkStreamingPortablePipelineTranslator();
} else {
translator = new SparkBatchPortablePipelineTranslator();
}
// Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
prepareFilesToStage(pipelineOptions);
PortablePipelineResult result;
final JavaSparkContext jsc = SparkContextFactory.getSparkContext(pipelineOptions);
final long startTime = Instant.now().getMillis();
EventLoggingListener eventLoggingListener = startEventLoggingListener(jsc, pipelineOptions, startTime);
// Initialize accumulators.
AggregatorsAccumulator.init(pipelineOptions, jsc);
MetricsEnvironment.setMetricsSupported(true);
MetricsAccumulator.init(pipelineOptions, jsc);
final SparkTranslationContext context = translator.createTranslationContext(jsc, pipelineOptions, jobInfo);
final ExecutorService executorService = Executors.newSingleThreadExecutor();
LOG.info(String.format("Running job %s on Spark master %s", jobInfo.jobId(), jsc.master()));
if (isStreaming) {
final JavaStreamingContext jssc = ((SparkStreamingTranslationContext) context).getStreamingContext();
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));
// Register user-defined listeners.
for (JavaStreamingListener listener : pipelineOptions.as(SparkContextOptions.class).getListeners()) {
LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
}
// Register Watermarks listener to broadcast the advanced WMs.
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new GlobalWatermarkHolder.WatermarkAdvancingStreamingListener()));
jssc.checkpoint(pipelineOptions.getCheckpointDir());
// Obtain timeout from options.
Long timeout = pipelineOptions.as(SparkPortableStreamingPipelineOptions.class).getStreamingTimeoutMs();
final Future<?> submissionFuture = executorService.submit(() -> {
translator.translate(fusedPipeline, context);
LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
context.computeOutputs();
jssc.start();
try {
jssc.awaitTerminationOrTimeout(timeout);
} catch (InterruptedException e) {
LOG.warn("Streaming context interrupted, shutting down.", e);
}
jssc.stop();
LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
});
result = new SparkPipelineResult.PortableStreamingMode(submissionFuture, jssc);
} else {
final Future<?> submissionFuture = executorService.submit(() -> {
translator.translate(fusedPipeline, context);
LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
context.computeOutputs();
LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
});
result = new SparkPipelineResult.PortableBatchMode(submissionFuture, jsc);
}
executorService.shutdown();
result.waitUntilFinish();
MetricsPusher metricsPusher = new MetricsPusher(MetricsAccumulator.getInstance().value(), pipelineOptions.as(MetricsOptions.class), result);
metricsPusher.start();
if (eventLoggingListener != null) {
eventLoggingListener.onApplicationStart(SparkCompat.buildSparkListenerApplicationStart(jsc, pipelineOptions, startTime, result));
eventLoggingListener.onApplicationEnd(new SparkListenerApplicationEnd(Instant.now().getMillis()));
eventLoggingListener.stop();
}
return result;
}
use of org.apache.beam.runners.jobsubmission.PortablePipelineResult in project beam by apache.
the class FlinkPipelineRunner method runPipelineWithTranslator.
private <T extends FlinkPortablePipelineTranslator.TranslationContext> PortablePipelineResult runPipelineWithTranslator(final Pipeline pipeline, JobInfo jobInfo, FlinkPortablePipelineTranslator<T> translator) throws Exception {
LOG.info("Translating pipeline to Flink program.");
// Expand any splittable ParDos within the graph to enable sizing and splitting of bundles.
Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
FlinkPortablePipelineTranslator.Executor executor = translator.translate(translator.createTranslationContext(jobInfo, pipelineOptions, confDir, filesToStage), fusedPipeline);
final JobExecutionResult result = executor.execute(pipelineOptions.getJobName());
return createPortablePipelineResult(result, pipelineOptions);
}
use of org.apache.beam.runners.jobsubmission.PortablePipelineResult in project beam by apache.
the class SamzaPipelineRunner method run.
@Override
public PortablePipelineResult run(final RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
// Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
RunnerApi.Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
// Don't let the fuser fuse any subcomponents of native transforms.
RunnerApi.Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, SamzaPortablePipelineTranslator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
LOG.info("Portable pipeline to run:");
LOG.info(PipelineDotRenderer.toDotString(fusedPipeline));
// the pipeline option coming from sdk will set the sdk specific runner which will break
// serialization
// so we need to reset the runner here to a valid Java runner
options.setRunner(SamzaRunner.class);
try {
final SamzaRunner runner = SamzaRunner.fromOptions(options);
final PortablePipelineResult result = runner.runPortablePipeline(fusedPipeline, jobInfo);
final SamzaExecutionEnvironment exeEnv = options.getSamzaExecutionEnvironment();
if (exeEnv == SamzaExecutionEnvironment.LOCAL || exeEnv == SamzaExecutionEnvironment.STANDALONE) {
// Make run() sync for local mode
result.waitUntilFinish();
}
return result;
} catch (Exception e) {
throw new RuntimeException("Failed to invoke samza job", e);
}
}
use of org.apache.beam.runners.jobsubmission.PortablePipelineResult in project beam by apache.
the class SamzaRunner method runPortablePipeline.
public PortablePipelineResult runPortablePipeline(RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
final String dotGraph = PipelineDotRenderer.toDotString(pipeline);
LOG.info("Portable pipeline to run DOT graph:\n{}", dotGraph);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPortablePipelineTranslator.createConfig(pipeline, configBuilder, options);
configBuilder.put(BEAM_DOT_GRAPH, dotGraph);
final Config config = configBuilder.build();
options.setConfigOverride(config);
if (listener != null) {
listener.onInit(config, options);
}
final SamzaExecutionContext executionContext = new SamzaExecutionContext(options);
final Map<String, MetricsReporterFactory> reporterFactories = getMetricsReporters();
final StreamApplication app = appDescriptor -> {
appDescriptor.withApplicationContainerContextFactory(executionContext.new Factory()).withMetricsReporterFactories(reporterFactories);
SamzaPortablePipelineTranslator.translate(pipeline, new PortableTranslationContext(appDescriptor, options, jobInfo));
};
ApplicationRunner runner = runSamzaApp(app, config);
return new SamzaPortablePipelineResult(app, runner, executionContext, listener, config);
}
Aggregations