Search in sources :

Example 1 with JobInfo

use of org.apache.beam.model.jobmanagement.v1.JobApi.JobInfo in project beam by apache.

the class InMemoryJobService method getJobs.

@Override
public void getJobs(GetJobsRequest request, StreamObserver<GetJobsResponse> responseObserver) {
    LOG.trace("{} {}", GetJobsRequest.class.getSimpleName(), request);
    try {
        List<JobInfo> result = new ArrayList<>();
        for (JobInvocation invocation : invocations.values()) {
            result.add(invocation.toProto());
        }
        GetJobsResponse response = GetJobsResponse.newBuilder().addAllJobInfo(result).build();
        responseObserver.onNext(response);
        responseObserver.onCompleted();
    } catch (Exception e) {
        LOG.error("Encountered Unexpected Exception", e);
        responseObserver.onError(Status.INTERNAL.withCause(e).asException());
    }
}
Also used : GetJobsResponse(org.apache.beam.model.jobmanagement.v1.JobApi.GetJobsResponse) JobInfo(org.apache.beam.model.jobmanagement.v1.JobApi.JobInfo) ArrayList(java.util.ArrayList) GetJobsRequest(org.apache.beam.model.jobmanagement.v1.JobApi.GetJobsRequest) StatusRuntimeException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusRuntimeException) StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException)

Example 2 with JobInfo

use of org.apache.beam.model.jobmanagement.v1.JobApi.JobInfo in project beam by apache.

the class SparkPipelineRunner method run.

@Override
public PortablePipelineResult run(RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
    SparkPortablePipelineTranslator translator;
    boolean isStreaming = pipelineOptions.isStreaming() || hasUnboundedPCollections(pipeline);
    if (isStreaming) {
        translator = new SparkStreamingPortablePipelineTranslator();
    } else {
        translator = new SparkBatchPortablePipelineTranslator();
    }
    // Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
    Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
    // Don't let the fuser fuse any subcomponents of native transforms.
    Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
    // Fused pipeline proto.
    // TODO: Consider supporting partially-fused graphs.
    RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
    prepareFilesToStage(pipelineOptions);
    PortablePipelineResult result;
    final JavaSparkContext jsc = SparkContextFactory.getSparkContext(pipelineOptions);
    final long startTime = Instant.now().getMillis();
    EventLoggingListener eventLoggingListener = startEventLoggingListener(jsc, pipelineOptions, startTime);
    // Initialize accumulators.
    AggregatorsAccumulator.init(pipelineOptions, jsc);
    MetricsEnvironment.setMetricsSupported(true);
    MetricsAccumulator.init(pipelineOptions, jsc);
    final SparkTranslationContext context = translator.createTranslationContext(jsc, pipelineOptions, jobInfo);
    final ExecutorService executorService = Executors.newSingleThreadExecutor();
    LOG.info(String.format("Running job %s on Spark master %s", jobInfo.jobId(), jsc.master()));
    if (isStreaming) {
        final JavaStreamingContext jssc = ((SparkStreamingTranslationContext) context).getStreamingContext();
        jssc.addStreamingListener(new JavaStreamingListenerWrapper(new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
        jssc.addStreamingListener(new JavaStreamingListenerWrapper(new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));
        // Register user-defined listeners.
        for (JavaStreamingListener listener : pipelineOptions.as(SparkContextOptions.class).getListeners()) {
            LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
            jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
        }
        // Register Watermarks listener to broadcast the advanced WMs.
        jssc.addStreamingListener(new JavaStreamingListenerWrapper(new GlobalWatermarkHolder.WatermarkAdvancingStreamingListener()));
        jssc.checkpoint(pipelineOptions.getCheckpointDir());
        // Obtain timeout from options.
        Long timeout = pipelineOptions.as(SparkPortableStreamingPipelineOptions.class).getStreamingTimeoutMs();
        final Future<?> submissionFuture = executorService.submit(() -> {
            translator.translate(fusedPipeline, context);
            LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
            context.computeOutputs();
            jssc.start();
            try {
                jssc.awaitTerminationOrTimeout(timeout);
            } catch (InterruptedException e) {
                LOG.warn("Streaming context interrupted, shutting down.", e);
            }
            jssc.stop();
            LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
        });
        result = new SparkPipelineResult.PortableStreamingMode(submissionFuture, jssc);
    } else {
        final Future<?> submissionFuture = executorService.submit(() -> {
            translator.translate(fusedPipeline, context);
            LOG.info(String.format("Job %s: Pipeline translated successfully. Computing outputs", jobInfo.jobId()));
            context.computeOutputs();
            LOG.info(String.format("Job %s finished.", jobInfo.jobId()));
        });
        result = new SparkPipelineResult.PortableBatchMode(submissionFuture, jsc);
    }
    executorService.shutdown();
    result.waitUntilFinish();
    MetricsPusher metricsPusher = new MetricsPusher(MetricsAccumulator.getInstance().value(), pipelineOptions.as(MetricsOptions.class), result);
    metricsPusher.start();
    if (eventLoggingListener != null) {
        eventLoggingListener.onApplicationStart(SparkCompat.buildSparkListenerApplicationStart(jsc, pipelineOptions, startTime, result));
        eventLoggingListener.onApplicationEnd(new SparkListenerApplicationEnd(Instant.now().getMillis()));
        eventLoggingListener.stop();
    }
    return result;
}
Also used : MetricsAccumulator(org.apache.beam.runners.spark.metrics.MetricsAccumulator) ArtifactApi(org.apache.beam.model.jobmanagement.v1.ArtifactApi) LoggerFactory(org.slf4j.LoggerFactory) GreedyPipelineFuser(org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser) PortablePipelineRunner(org.apache.beam.runners.jobsubmission.PortablePipelineRunner) SparkCompat(org.apache.beam.runners.spark.util.SparkCompat) Future(java.util.concurrent.Future) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) SparkListenerApplicationEnd(org.apache.spark.scheduler.SparkListenerApplicationEnd) SparkStreamingPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkStreamingPortablePipelineTranslator) CmdLineParser(org.kohsuke.args4j.CmdLineParser) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) SparkPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkPortablePipelineTranslator) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct) UUID(java.util.UUID) TrivialNativeTransformExpander(org.apache.beam.runners.core.construction.graph.TrivialNativeTransformExpander) Option(org.kohsuke.args4j.Option) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Executors(java.util.concurrent.Executors) MetricsPusher(org.apache.beam.runners.core.metrics.MetricsPusher) CmdLineException(org.kohsuke.args4j.CmdLineException) ProtoOverrides(org.apache.beam.runners.core.construction.graph.ProtoOverrides) AggregatorsAccumulator(org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PipelineOptionsTranslation(org.apache.beam.runners.core.construction.PipelineOptionsTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) SparkCommon.startEventLoggingListener(org.apache.beam.runners.spark.util.SparkCommon.startEventLoggingListener) SparkBatchPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkBatchPortablePipelineTranslator) PortablePipelineResult(org.apache.beam.runners.jobsubmission.PortablePipelineResult) SparkTranslationContext(org.apache.beam.runners.spark.translation.SparkTranslationContext) PipelineTranslatorUtils.hasUnboundedPCollections(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.hasUnboundedPCollections) GlobalWatermarkHolder(org.apache.beam.runners.spark.util.GlobalWatermarkHolder) JavaStreamingListenerWrapper(org.apache.spark.streaming.api.java.JavaStreamingListenerWrapper) ExecutorService(java.util.concurrent.ExecutorService) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) JavaStreamingListener(org.apache.spark.streaming.api.java.JavaStreamingListener) Logger(org.slf4j.Logger) PortablePipelineJarUtils(org.apache.beam.runners.jobsubmission.PortablePipelineJarUtils) SparkStreamingTranslationContext(org.apache.beam.runners.spark.translation.SparkStreamingTranslationContext) SparkContextFactory(org.apache.beam.runners.spark.translation.SparkContextFactory) SplittableParDoExpander(org.apache.beam.runners.core.construction.graph.SplittableParDoExpander) MetricsEnvironment(org.apache.beam.sdk.metrics.MetricsEnvironment) MetricsOptions(org.apache.beam.sdk.metrics.MetricsOptions) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) SparkCommonPipelineOptions.prepareFilesToStage(org.apache.beam.runners.spark.SparkCommonPipelineOptions.prepareFilesToStage) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) Nullable(edu.umd.cs.findbugs.annotations.Nullable) EventLoggingListener(org.apache.spark.scheduler.EventLoggingListener) FileSystems(org.apache.beam.sdk.io.FileSystems) MetricsOptions(org.apache.beam.sdk.metrics.MetricsOptions) SparkPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkPortablePipelineTranslator) SparkBatchPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkBatchPortablePipelineTranslator) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) SparkListenerApplicationEnd(org.apache.spark.scheduler.SparkListenerApplicationEnd) PortablePipelineResult(org.apache.beam.runners.jobsubmission.PortablePipelineResult) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MetricsPusher(org.apache.beam.runners.core.metrics.MetricsPusher) SparkCommon.startEventLoggingListener(org.apache.beam.runners.spark.util.SparkCommon.startEventLoggingListener) EventLoggingListener(org.apache.spark.scheduler.EventLoggingListener) SparkTranslationContext(org.apache.beam.runners.spark.translation.SparkTranslationContext) SparkStreamingPortablePipelineTranslator(org.apache.beam.runners.spark.translation.SparkStreamingPortablePipelineTranslator) JavaStreamingListenerWrapper(org.apache.spark.streaming.api.java.JavaStreamingListenerWrapper) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) JavaStreamingListener(org.apache.spark.streaming.api.java.JavaStreamingListener) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) SparkStreamingTranslationContext(org.apache.beam.runners.spark.translation.SparkStreamingTranslationContext) ExecutorService(java.util.concurrent.ExecutorService)

Example 3 with JobInfo

use of org.apache.beam.model.jobmanagement.v1.JobApi.JobInfo in project beam by apache.

the class FlinkPipelineRunner method runPipelineWithTranslator.

private <T extends FlinkPortablePipelineTranslator.TranslationContext> PortablePipelineResult runPipelineWithTranslator(final Pipeline pipeline, JobInfo jobInfo, FlinkPortablePipelineTranslator<T> translator) throws Exception {
    LOG.info("Translating pipeline to Flink program.");
    // Expand any splittable ParDos within the graph to enable sizing and splitting of bundles.
    Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
    // Don't let the fuser fuse any subcomponents of native transforms.
    Pipeline trimmedPipeline = TrivialNativeTransformExpander.forKnownUrns(pipelineWithSdfExpanded, translator.knownUrns());
    // Fused pipeline proto.
    // TODO: Consider supporting partially-fused graphs.
    RunnerApi.Pipeline fusedPipeline = trimmedPipeline.getComponents().getTransformsMap().values().stream().anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) ? trimmedPipeline : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline();
    FlinkPortablePipelineTranslator.Executor executor = translator.translate(translator.createTranslationContext(jobInfo, pipelineOptions, confDir, filesToStage), fusedPipeline);
    final JobExecutionResult result = executor.execute(pipelineOptions.getJobName());
    return createPortablePipelineResult(result, pipelineOptions);
}
Also used : ArtifactApi(org.apache.beam.model.jobmanagement.v1.ArtifactApi) LoggerFactory(org.slf4j.LoggerFactory) PipelineOptionsTranslation(org.apache.beam.runners.core.construction.PipelineOptionsTranslation) GreedyPipelineFuser(org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) PortablePipelineRunner(org.apache.beam.runners.jobsubmission.PortablePipelineRunner) Map(java.util.Map) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) PortablePipelineResult(org.apache.beam.runners.jobsubmission.PortablePipelineResult) PipelineTranslatorUtils.hasUnboundedPCollections(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.hasUnboundedPCollections) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) CmdLineParser(org.kohsuke.args4j.CmdLineParser) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) Logger(org.slf4j.Logger) PortablePipelineJarUtils(org.apache.beam.runners.jobsubmission.PortablePipelineJarUtils) PipelineResources.detectClassPathResourcesToStage(org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct) UUID(java.util.UUID) TrivialNativeTransformExpander(org.apache.beam.runners.core.construction.graph.TrivialNativeTransformExpander) Option(org.kohsuke.args4j.Option) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) SplittableParDoExpander(org.apache.beam.runners.core.construction.graph.SplittableParDoExpander) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) MetricsPusher(org.apache.beam.runners.core.metrics.MetricsPusher) MetricsEnvironment(org.apache.beam.sdk.metrics.MetricsEnvironment) MetricsOptions(org.apache.beam.sdk.metrics.MetricsOptions) CmdLineException(org.kohsuke.args4j.CmdLineException) List(java.util.List) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) ProtoOverrides(org.apache.beam.runners.core.construction.graph.ProtoOverrides) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) FileSystems(org.apache.beam.sdk.io.FileSystems) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)

Aggregations

UUID (java.util.UUID)2 ArtifactApi (org.apache.beam.model.jobmanagement.v1.ArtifactApi)2 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)2 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)2 PTransformTranslation (org.apache.beam.runners.core.construction.PTransformTranslation)2 PipelineOptionsTranslation (org.apache.beam.runners.core.construction.PipelineOptionsTranslation)2 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)2 GreedyPipelineFuser (org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser)2 ProtoOverrides (org.apache.beam.runners.core.construction.graph.ProtoOverrides)2 SplittableParDoExpander (org.apache.beam.runners.core.construction.graph.SplittableParDoExpander)2 TrivialNativeTransformExpander (org.apache.beam.runners.core.construction.graph.TrivialNativeTransformExpander)2 MetricsPusher (org.apache.beam.runners.core.metrics.MetricsPusher)2 JobInfo (org.apache.beam.runners.fnexecution.provisioning.JobInfo)2 PipelineTranslatorUtils.hasUnboundedPCollections (org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.hasUnboundedPCollections)2 PortablePipelineJarUtils (org.apache.beam.runners.jobsubmission.PortablePipelineJarUtils)2 PortablePipelineResult (org.apache.beam.runners.jobsubmission.PortablePipelineResult)2 PortablePipelineRunner (org.apache.beam.runners.jobsubmission.PortablePipelineRunner)2 FileSystems (org.apache.beam.sdk.io.FileSystems)2 MetricsEnvironment (org.apache.beam.sdk.metrics.MetricsEnvironment)2 MetricsOptions (org.apache.beam.sdk.metrics.MetricsOptions)2