Examples with Pipeline - org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline

Example 36 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class PortableRunner method run.

@Override
public PipelineResult run(Pipeline pipeline) {
    Runnable cleanup;
    if (Environments.ENVIRONMENT_LOOPBACK.equals(options.as(PortablePipelineOptions.class).getDefaultEnvironmentType())) {
        GrpcFnServer<ExternalWorkerService> workerService;
        try {
            workerService = new ExternalWorkerService(options).start();
        } catch (Exception exn) {
            throw new RuntimeException("Failed to start GrpcFnServer for ExternalWorkerService", exn);
        }
        LOG.info("Starting worker service at {}", workerService.getApiServiceDescriptor().getUrl());
        options.as(PortablePipelineOptions.class).setDefaultEnvironmentConfig(workerService.getApiServiceDescriptor().getUrl());
        cleanup = () -> {
            try {
                LOG.warn("closing worker service {}", workerService);
                workerService.close();
            } catch (Exception exn) {
                throw new RuntimeException(exn);
            }
        };
    } else {
        cleanup = null;
    }
    ImmutableList.Builder<String> filesToStageBuilder = ImmutableList.builder();
    List<String> stagingFiles = options.as(PortablePipelineOptions.class).getFilesToStage();
    if (stagingFiles == null) {
        List<String> classpathResources = detectClassPathResourcesToStage(Environments.class.getClassLoader(), options);
        if (classpathResources.isEmpty()) {
            throw new IllegalArgumentException("No classpath elements found.");
        }
        LOG.debug("PortablePipelineOptions.filesToStage was not specified. " + "Defaulting to files from the classpath: {}", classpathResources.size());
        filesToStageBuilder.addAll(classpathResources);
    } else {
        filesToStageBuilder.addAll(stagingFiles);
    }
    // TODO(heejong): remove jar_packages experimental flag when cross-language dependency
    // management is implemented for all runners.
    List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
    if (experiments != null) {
        Optional<String> jarPackages = experiments.stream().filter((String flag) -> flag.startsWith("jar_packages=")).findFirst();
        jarPackages.ifPresent(s -> filesToStageBuilder.addAll(Arrays.asList(s.replaceFirst("jar_packages=", "").split(","))));
    }
    options.as(PortablePipelineOptions.class).setFilesToStage(filesToStageBuilder.build());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, SdkComponents.create(options));
    pipelineProto = DefaultArtifactResolver.INSTANCE.resolveArtifacts(pipelineProto);
    PrepareJobRequest prepareJobRequest = PrepareJobRequest.newBuilder().setJobName(options.getJobName()).setPipeline(pipelineProto).setPipelineOptions(PipelineOptionsTranslation.toProto(options)).build();
    LOG.info("Using job server endpoint: {}", endpoint);
    ManagedChannel jobServiceChannel = channelFactory.forDescriptor(ApiServiceDescriptor.newBuilder().setUrl(endpoint).build());
    JobServiceBlockingStub jobService = JobServiceGrpc.newBlockingStub(jobServiceChannel);
    try (CloseableResource<JobServiceBlockingStub> wrappedJobService = CloseableResource.of(jobService, unused -> jobServiceChannel.shutdown())) {
        final int jobServerTimeout = options.as(PortablePipelineOptions.class).getJobServerTimeout();
        PrepareJobResponse prepareJobResponse = jobService.withDeadlineAfter(jobServerTimeout, TimeUnit.SECONDS).withWaitForReady().prepare(prepareJobRequest);
        LOG.info("PrepareJobResponse: {}", prepareJobResponse);
        ApiServiceDescriptor artifactStagingEndpoint = prepareJobResponse.getArtifactStagingEndpoint();
        String stagingSessionToken = prepareJobResponse.getStagingSessionToken();
        try (CloseableResource<ManagedChannel> artifactChannel = CloseableResource.of(channelFactory.forDescriptor(artifactStagingEndpoint), ManagedChannel::shutdown)) {
            ArtifactStagingService.offer(new ArtifactRetrievalService(), ArtifactStagingServiceGrpc.newStub(artifactChannel.get()), stagingSessionToken);
        } catch (CloseableResource.CloseException e) {
            LOG.warn("Error closing artifact staging channel", e);
        // CloseExceptions should only be thrown while closing the channel.
        } catch (Exception e) {
            throw new RuntimeException("Error staging files.", e);
        }
        RunJobRequest runJobRequest = RunJobRequest.newBuilder().setPreparationId(prepareJobResponse.getPreparationId()).build();
        // Run the job and wait for a result, we don't set a timeout here because
        // it may take a long time for a job to complete and streaming
        // jobs never return a response.
        RunJobResponse runJobResponse = jobService.run(runJobRequest);
        LOG.info("RunJobResponse: {}", runJobResponse);
        ByteString jobId = runJobResponse.getJobIdBytes();
        return new JobServicePipelineResult(jobId, jobServerTimeout, wrappedJobService.transfer(), cleanup);
    } catch (CloseException e) {
        throw new RuntimeException(e);
    }
}

Also used : JobServiceBlockingStub(org.apache.beam.model.jobmanagement.v1.JobServiceGrpc.JobServiceBlockingStub) PrepareJobResponse(org.apache.beam.model.jobmanagement.v1.JobApi.PrepareJobResponse) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CloseException(org.apache.beam.runners.portability.CloseableResource.CloseException) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ArtifactRetrievalService(org.apache.beam.runners.fnexecution.artifact.ArtifactRetrievalService) RunJobResponse(org.apache.beam.model.jobmanagement.v1.JobApi.RunJobResponse) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) RunJobRequest(org.apache.beam.model.jobmanagement.v1.JobApi.RunJobRequest) ExternalWorkerService(org.apache.beam.fn.harness.ExternalWorkerService) PrepareJobRequest(org.apache.beam.model.jobmanagement.v1.JobApi.PrepareJobRequest) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) CloseException(org.apache.beam.runners.portability.CloseableResource.CloseException) CloseException(org.apache.beam.runners.portability.CloseableResource.CloseException) Environments(org.apache.beam.runners.core.construction.Environments) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions)

Example 37 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class SamzaRunner method run.

@Override
public SamzaPipelineResult run(Pipeline pipeline) {
    // performance issue.
    if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    MetricsEnvironment.setMetricsSupported(true);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Pre-processed Beam pipeline in dot format:\n{}", PipelineDotRenderer.toDotString(pipeline));
        LOG.debug("Pre-processed Beam pipeline in json format:\n{}", PipelineJsonRenderer.toJsonString(pipeline));
    }
    pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());
    final String dotGraph = PipelineDotRenderer.toDotString(pipeline);
    LOG.info("Beam pipeline DOT graph:\n{}", dotGraph);
    final String jsonGraph = PipelineJsonRenderer.toJsonString(pipeline);
    LOG.info("Beam pipeline JSON graph:\n{}", jsonGraph);
    final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
    final ConfigBuilder configBuilder = new ConfigBuilder(options);
    SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
    configBuilder.put(BEAM_DOT_GRAPH, dotGraph);
    configBuilder.put(BEAM_JSON_GRAPH, jsonGraph);
    final Config config = configBuilder.build();
    options.setConfigOverride(config);
    if (listener != null) {
        listener.onInit(config, options);
    }
    final SamzaExecutionContext executionContext = new SamzaExecutionContext(options);
    final Map<String, MetricsReporterFactory> reporterFactories = getMetricsReporters();
    final StreamApplication app = appDescriptor -> {
        appDescriptor.withApplicationContainerContextFactory(executionContext.new Factory());
        appDescriptor.withMetricsReporterFactories(reporterFactories);
        SamzaPipelineTranslator.translate(pipeline, new TranslationContext(appDescriptor, idMap, options));
    };
    // perform a final round of validation for the pipeline options now that all configs are
    // generated
    SamzaPipelineOptionsValidator.validate(options);
    ApplicationRunner runner = runSamzaApp(app, config);
    return new SamzaPipelineResult(runner, executionContext, listener, config);
}

Also used : PViewToIdMapper(org.apache.beam.runners.samza.translation.PViewToIdMapper) PortableTranslationContext(org.apache.beam.runners.samza.translation.PortableTranslationContext) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) PipelineJsonRenderer(org.apache.beam.runners.samza.util.PipelineJsonRenderer) PipelineRunner(org.apache.beam.sdk.PipelineRunner) Map(java.util.Map) SamzaPipelineTranslator(org.apache.beam.runners.samza.translation.SamzaPipelineTranslator) MetricsReporter(org.apache.samza.metrics.MetricsReporter) MetricsReporterFactory(org.apache.samza.metrics.MetricsReporterFactory) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) PortablePipelineResult(org.apache.beam.runners.jobsubmission.PortablePipelineResult) Pipeline(org.apache.beam.sdk.Pipeline) Iterators(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterators) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) ExternalContext(org.apache.samza.context.ExternalContext) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) PipelineDotRenderer(org.apache.beam.runners.core.construction.renderer.PipelineDotRenderer) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) TranslationContext(org.apache.beam.runners.samza.translation.TranslationContext) ServiceLoader(java.util.ServiceLoader) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) SamzaPortablePipelineTranslator(org.apache.beam.runners.samza.translation.SamzaPortablePipelineTranslator) PipelineOptionsValidator(org.apache.beam.sdk.options.PipelineOptionsValidator) MetricsEnvironment(org.apache.beam.sdk.metrics.MetricsEnvironment) PValue(org.apache.beam.sdk.values.PValue) ConfigBuilder(org.apache.beam.runners.samza.translation.ConfigBuilder) Config(org.apache.samza.config.Config) SamzaTransformOverrides(org.apache.beam.runners.samza.translation.SamzaTransformOverrides) StreamApplication(org.apache.samza.application.StreamApplication) Collections(java.util.Collections) Config(org.apache.samza.config.Config) StreamApplication(org.apache.samza.application.StreamApplication) LoggerFactory(org.slf4j.LoggerFactory) MetricsReporterFactory(org.apache.samza.metrics.MetricsReporterFactory) PValue(org.apache.beam.sdk.values.PValue) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) MetricsReporterFactory(org.apache.samza.metrics.MetricsReporterFactory) ConfigBuilder(org.apache.beam.runners.samza.translation.ConfigBuilder) PortableTranslationContext(org.apache.beam.runners.samza.translation.PortableTranslationContext) TranslationContext(org.apache.beam.runners.samza.translation.TranslationContext)

Example 38 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ReadSourcePortableTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=use_deprecated_read").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<Long> result = p.apply(Read.from(new Source(10))).apply(Window.into(FixedWindows.of(Duration.millis(1))));
    PAssert.that(result).containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));
    SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReads(p);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    List<RunnerApi.PTransform> readTransforms = pipelineProto.getComponents().getTransformsMap().values().stream().filter(transform -> transform.getSpec().getUrn().equals(PTransformTranslation.READ_TRANSFORM_URN)).collect(Collectors.toList());
    assertThat(readTransforms, not(empty()));
    // execute the pipeline
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        assertThat(jobInvocation.getState(), not(JobState.Enum.FAILED));
        Thread.sleep(100);
    }
}

Also used : SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) Matchers.not(org.hamcrest.Matchers.not) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LoggerFactory(org.slf4j.LoggerFactory) Coder(org.apache.beam.sdk.coders.Coder) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Environments(org.apache.beam.runners.core.construction.Environments) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) Read(org.apache.beam.sdk.io.Read) Window(org.apache.beam.sdk.transforms.windowing.Window) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) NoSuchElementException(java.util.NoSuchElementException) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Parameterized(org.junit.runners.Parameterized) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Matchers.empty(org.hamcrest.Matchers.empty) AfterClass(org.junit.AfterClass) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) MoreExecutors(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.MoreExecutors) Logger(org.slf4j.Logger) PAssert(org.apache.beam.sdk.testing.PAssert) Parameter(org.junit.runners.Parameterized.Parameter) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) TimeUnit(java.util.concurrent.TimeUnit) CrashingRunner(org.apache.beam.sdk.testing.CrashingRunner) List(java.util.List) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) ListeningExecutorService(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Collections(java.util.Collections) JobState(org.apache.beam.model.jobmanagement.v1.JobApi.JobState) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 39 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class PortablePipelineJarCreator method run.

/**
 * <em>Does not actually run the pipeline.</em> Instead bundles the input pipeline along with all
 * dependencies, artifacts, etc. required to run the pipeline into a jar that can be executed
 * later.
 */
@Override
public PortablePipelineResult run(Pipeline pipeline, JobInfo jobInfo) throws Exception {
    PortablePipelineOptions pipelineOptions = PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions()).as(PortablePipelineOptions.class);
    final String jobName = jobInfo.jobName();
    File outputFile = new File(checkArgumentNotNull(pipelineOptions.getOutputExecutablePath()));
    LOG.info("Creating jar {} for job {}", outputFile.getAbsolutePath(), jobName);
    outputStream = new JarOutputStream(new FileOutputStream(outputFile), createManifest(mainClass, jobName));
    outputChannel = Channels.newChannel(outputStream);
    PortablePipelineJarUtils.writeDefaultJobName(outputStream, jobName);
    copyResourcesFromJar(new JarFile(mainClass.getProtectionDomain().getCodeSource().getLocation().getPath()));
    writeAsJson(PipelineOptionsTranslation.toProto(pipelineOptions), PortablePipelineJarUtils.getPipelineOptionsUri(jobName));
    Pipeline pipelineWithClasspathArtifacts = writeArtifacts(pipeline, jobName);
    writeAsJson(pipelineWithClasspathArtifacts, PortablePipelineJarUtils.getPipelineUri(jobName));
    // Closing the channel also closes the underlying stream.
    outputChannel.close();
    LOG.info("Jar {} created successfully.", outputFile.getAbsolutePath());
    return new JarCreatorPipelineResult();
}

Also used : PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) FileOutputStream(java.io.FileOutputStream) JarOutputStream(java.util.jar.JarOutputStream) JarFile(java.util.jar.JarFile) JarFile(java.util.jar.JarFile) File(java.io.File) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)

Example 40 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class InMemoryJobService method prepare.

@Override
public void prepare(PrepareJobRequest request, StreamObserver<PrepareJobResponse> responseObserver) {
    try {
        LOG.trace("{} {}", PrepareJobRequest.class.getSimpleName(), request);
        // insert preparation
        String preparationId = String.format("%s_%s", request.getJobName(), UUID.randomUUID().toString());
        Struct pipelineOptions = request.getPipelineOptions();
        if (pipelineOptions == null) {
            throw new NullPointerException("Encountered null pipeline options.");
        }
        LOG.trace("PIPELINE OPTIONS {} {}", pipelineOptions.getClass(), pipelineOptions);
        JobPreparation preparation = JobPreparation.builder().setId(preparationId).setPipeline(request.getPipeline()).setOptions(pipelineOptions).build();
        JobPreparation previous = preparations.putIfAbsent(preparationId, preparation);
        if (previous != null) {
            // this should never happen with a UUID
            String errMessage = String.format("A job with the preparation ID \"%s\" already exists.", preparationId);
            StatusException exception = Status.NOT_FOUND.withDescription(errMessage).asException();
            responseObserver.onError(exception);
            return;
        }
        String stagingSessionToken = stagingServiceTokenProvider.apply(preparationId);
        stagingSessionTokens.putIfAbsent(preparationId, stagingSessionToken);
        stagingService.getService().registerJob(stagingSessionToken, Maps.transformValues(request.getPipeline().getComponents().getEnvironmentsMap(), RunnerApi.Environment::getDependenciesList));
        // send response
        PrepareJobResponse response = PrepareJobResponse.newBuilder().setPreparationId(preparationId).setArtifactStagingEndpoint(stagingServiceDescriptor).setStagingSessionToken(stagingSessionToken).build();
        responseObserver.onNext(response);
        responseObserver.onCompleted();
    } catch (Exception e) {
        LOG.error("Could not prepare job with name {}", request.getJobName(), e);
        responseObserver.onError(Status.INTERNAL.withCause(e).asException());
    }
}

Also used : StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PrepareJobResponse(org.apache.beam.model.jobmanagement.v1.JobApi.PrepareJobResponse) PrepareJobRequest(org.apache.beam.model.jobmanagement.v1.JobApi.PrepareJobRequest) StatusRuntimeException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusRuntimeException) StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)117 Test (org.junit.Test)87 Pipeline (org.apache.beam.sdk.Pipeline)82 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)44 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)43 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)38 Map (java.util.Map)32 KV (org.apache.beam.sdk.values.KV)26 Job (com.google.api.services.dataflow.model.Job)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)24 KvCoder (org.apache.beam.sdk.coders.KvCoder)24 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 Coder (org.apache.beam.sdk.coders.Coder)23 ArrayList (java.util.ArrayList)22 WindowedValue (org.apache.beam.sdk.util.WindowedValue)22 HashMap (java.util.HashMap)20 List (java.util.List)20 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)19 IOException (java.io.IOException)18 PCollection (org.apache.beam.sdk.values.PCollection)18