use of org.apache.beam.runners.fnexecution.provisioning.JobInfo in project beam by apache.
the class ReferenceCountingExecutableStageContextFactoryTest method testCreateReuseReleaseCreate.
@Test
public void testCreateReuseReleaseCreate() throws Exception {
Creator creator = mock(Creator.class);
ExecutableStageContext c1 = mock(ExecutableStageContext.class);
ExecutableStageContext c2 = mock(ExecutableStageContext.class);
ExecutableStageContext c3 = mock(ExecutableStageContext.class);
ExecutableStageContext c4 = mock(ExecutableStageContext.class);
when(creator.apply(any(JobInfo.class))).thenReturn(c1).thenReturn(c2).thenReturn(c3).thenReturn(c4);
ReferenceCountingExecutableStageContextFactory factory = ReferenceCountingExecutableStageContextFactory.create(creator, (x) -> true);
JobInfo jobA = mock(JobInfo.class);
when(jobA.jobId()).thenReturn("jobA");
JobInfo jobB = mock(JobInfo.class);
when(jobB.jobId()).thenReturn("jobB");
// 1 open jobA
ExecutableStageContext ac1A = factory.get(jobA);
// 1 open jobB
ExecutableStageContext ac2B = factory.get(jobB);
Assert.assertSame("Context should be cached and reused.", ac1A, // 2 open jobA
factory.get(jobA));
Assert.assertSame("Context should be cached and reused.", ac2B, // 2 open jobB
factory.get(jobB));
// 1 open jobA
factory.release(ac1A);
Assert.assertSame("Context should be cached and reused.", ac1A, // 2 open jobA
factory.get(jobA));
// 1 open jobA
factory.release(ac1A);
// 0 open jobA
factory.release(ac1A);
// 1 open jobA
ExecutableStageContext ac3A = factory.get(jobA);
Assert.assertNotSame("We should get a new instance.", ac1A, ac3A);
Assert.assertSame("Context should be cached and reused.", ac3A, // 2 open jobA
factory.get(jobA));
// 1 open jobA
factory.release(ac3A);
// 0 open jobA
factory.release(ac3A);
Assert.assertSame("Context should be cached and reused.", ac2B, // 3 open jobB
factory.get(jobB));
// 2 open jobB
factory.release(ac2B);
// 1 open jobB
factory.release(ac2B);
// 0 open jobB
factory.release(ac2B);
// 1 open jobB
ExecutableStageContext ac4B = factory.get(jobB);
Assert.assertNotSame("We should get a new instance.", ac2B, ac4B);
// 0 open jobB
factory.release(ac4B);
}
use of org.apache.beam.runners.fnexecution.provisioning.JobInfo in project beam by apache.
the class SamzaJobInvoker method invokeWithExecutor.
@Override
protected JobInvocation invokeWithExecutor(RunnerApi.Pipeline pipeline, Struct options, @Nullable String retrievalToken, ListeningExecutorService executorService) {
LOG.trace("Parsing pipeline options");
final SamzaPortablePipelineOptions samzaOptions = PipelineOptionsTranslation.fromProto(options).as(SamzaPortablePipelineOptions.class);
final PortablePipelineRunner pipelineRunner;
if (Strings.isNullOrEmpty(samzaOptions.getOutputExecutablePath())) {
pipelineRunner = new SamzaPipelineRunner(samzaOptions);
} else {
/*
* To support --output_executable_path where bundles the input pipeline along with all
* artifacts, etc. required to run the pipeline into a jar that can be executed later.
*/
pipelineRunner = new PortablePipelineJarCreator(SamzaPipelineRunner.class);
}
final String invocationId = String.format("%s_%s", samzaOptions.getJobName(), UUID.randomUUID().toString());
final JobInfo jobInfo = JobInfo.create(invocationId, samzaOptions.getJobName(), retrievalToken, options);
return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner);
}
use of org.apache.beam.runners.fnexecution.provisioning.JobInfo in project beam by apache.
the class SparkPipelineRunner method run.
@Override
public PortablePipelineResult run(RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
SparkPortablePipelineTranslator translator;
boolean isStreaming = pipelineOptions.isStreaming() || hasUnboundedPCollections(pipeline);
if (isStreaming) {
translator = new SparkStreamingPortablePipelineTranslator();
} else {
translator = new SparkBatchPortablePipelineTranslator();
}
// Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
prepareFilesToStage(pipelineOptions);
PortablePipelineResult result;
final JavaSparkContext jsc = SparkContextFactory.getSparkContext(pipelineOptions);
final long startTime = Instant.now().getMillis();
EventLoggingListener eventLoggingListener = startEventLoggingListener(jsc, pipelineOptions, startTime);
// Initialize accumulators.
AggregatorsAccumulator.init(pipelineOptions, jsc);
MetricsEnvironment.setMetricsSupported(true);
MetricsAccumulator.init(pipelineOptions, jsc);
final SparkTranslationContext context = translator.createTranslationContext(jsc, pipelineOptions, jobInfo);
final ExecutorService executorService = Executors.newSingleThreadExecutor();
LOG.info(String.format("Running job %s on Spark master %s", jobInfo.jobId(), jsc.master()));
if (isStreaming) {
final JavaStreamingContext jssc = ((SparkStreamingTranslationContext) context).getStreamingContext();
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));
// Register user-defined listeners.
for (JavaStreamingListener listener : pipelineOptions.as(SparkContextOptions.class).getListeners()) {
LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
}
// Register Watermarks listener to broadcast the advanced WMs.
jssc.addStreamingListener(new JavaStreamingListenerWrapper(new GlobalWatermarkHolder.WatermarkAdvancingStreamingListener()));
jssc.checkpoint(pipelineOptions.getCheckpointDir());
// Obtain timeout from options.
Long timeout = pipelineOptions.as(SparkPortableStreamingPipelineOptions.class).getStreamingTimeoutMs();
final Future<?> submissionFuture = executorService.submit(() -> {
translator.translate(fusedPipeline, context);
LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
context.computeOutputs();
jssc.start();
try {
jssc.awaitTerminationOrTimeout(timeout);
} catch (InterruptedException e) {
LOG.warn("Streaming context interrupted, shutting down.", e);
}
jssc.stop();
LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
});
result = new SparkPipelineResult.PortableStreamingMode(submissionFuture, jssc);
} else {
final Future<?> submissionFuture = executorService.submit(() -> {
translator.translate(fusedPipeline, context);
LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
context.computeOutputs();
LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
});
result = new SparkPipelineResult.PortableBatchMode(submissionFuture, jsc);
}
executorService.shutdown();
result.waitUntilFinish();
MetricsPusher metricsPusher = new MetricsPusher(MetricsAccumulator.getInstance().value(), pipelineOptions.as(MetricsOptions.class), result);
metricsPusher.start();
if (eventLoggingListener != null) {
eventLoggingListener.onApplicationStart(SparkCompat.buildSparkListenerApplicationStart(jsc, pipelineOptions, startTime, result));
eventLoggingListener.onApplicationEnd(new SparkListenerApplicationEnd(Instant.now().getMillis()));
eventLoggingListener.stop();
}
return result;
}
use of org.apache.beam.runners.fnexecution.provisioning.JobInfo in project beam by apache.
the class FlinkPipelineRunner method runPipelineWithTranslator.
private <T extends FlinkPortablePipelineTranslator.TranslationContext> PortablePipelineResult runPipelineWithTranslator(final Pipeline pipeline, JobInfo jobInfo, FlinkPortablePipelineTranslator<T> translator) throws Exception {
LOG.info("Translating pipeline to Flink program.");
// Expand any splittable ParDos within the graph to enable sizing and splitting of bundles.
Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
FlinkPortablePipelineTranslator.Executor executor = translator.translate(translator.createTranslationContext(jobInfo, pipelineOptions, confDir, filesToStage), fusedPipeline);
final JobExecutionResult result = executor.execute(pipelineOptions.getJobName());
return createPortablePipelineResult(result, pipelineOptions);
}
use of org.apache.beam.runners.fnexecution.provisioning.JobInfo in project beam by apache.
the class FlinkPipelineRunner method main.
/**
* Main method to be called only as the entry point to an executable jar with structure as defined
* in {@link PortablePipelineJarUtils}.
*/
public static void main(String[] args) throws Exception {
// Register standard file systems.
FileSystems.setDefaultPipelineOptions(PipelineOptionsFactory.create());
FlinkPipelineRunnerConfiguration configuration = parseArgs(args);
String baseJobName = configuration.baseJobName == null ? PortablePipelineJarUtils.getDefaultJobName() : configuration.baseJobName;
Preconditions.checkArgument(baseJobName != null, "No default job name found. Job name must be set using --base-job-name.");
Pipeline pipeline = PortablePipelineJarUtils.getPipelineFromClasspath(baseJobName);
Struct originalOptions = PortablePipelineJarUtils.getPipelineOptionsFromClasspath(baseJobName);
// The retrieval token is only required by the legacy artifact service, which the Flink runner
// no longer uses.
String retrievalToken = ArtifactApi.CommitManifestResponse.Constants.NO_ARTIFACTS_STAGED_TOKEN.getValueDescriptor().getOptions().getExtension(RunnerApi.beamConstant);
FlinkPipelineOptions flinkOptions = PipelineOptionsTranslation.fromProto(originalOptions).as(FlinkPipelineOptions.class);
String invocationId = String.format("%s_%s", flinkOptions.getJobName(), UUID.randomUUID().toString());
FlinkPipelineRunner runner = new FlinkPipelineRunner(flinkOptions, configuration.flinkConfDir, detectClassPathResourcesToStage(FlinkPipelineRunner.class.getClassLoader(), flinkOptions));
JobInfo jobInfo = JobInfo.create(invocationId, flinkOptions.getJobName(), retrievalToken, PipelineOptionsTranslation.toProto(flinkOptions));
try {
runner.run(pipeline, jobInfo);
} catch (Exception e) {
throw new RuntimeException(String.format("Job %s failed.", invocationId), e);
}
LOG.info("Job {} finished successfully.", invocationId);
}
Aggregations