use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PortableRunner method run.
@Override
public PipelineResult run(Pipeline pipeline) {
Runnable cleanup;
if (Environments.ENVIRONMENT_LOOPBACK.equals(options.as(PortablePipelineOptions.class).getDefaultEnvironmentType())) {
GrpcFnServer<ExternalWorkerService> workerService;
try {
workerService = new ExternalWorkerService(options).start();
} catch (Exception exn) {
throw new RuntimeException("Failed to start GrpcFnServer for ExternalWorkerService", exn);
}
LOG.info("Starting worker service at {}", workerService.getApiServiceDescriptor().getUrl());
options.as(PortablePipelineOptions.class).setDefaultEnvironmentConfig(workerService.getApiServiceDescriptor().getUrl());
cleanup = () -> {
try {
LOG.warn("closing worker service {}", workerService);
workerService.close();
} catch (Exception exn) {
throw new RuntimeException(exn);
}
};
} else {
cleanup = null;
}
ImmutableList.Builder<String> filesToStageBuilder = ImmutableList.builder();
List<String> stagingFiles = options.as(PortablePipelineOptions.class).getFilesToStage();
if (stagingFiles == null) {
List<String> classpathResources = detectClassPathResourcesToStage(Environments.class.getClassLoader(), options);
if (classpathResources.isEmpty()) {
throw new IllegalArgumentException("No classpath elements found.");
}
LOG.debug("PortablePipelineOptions.filesToStage was not specified. " + "Defaulting to files from the classpath: {}", classpathResources.size());
filesToStageBuilder.addAll(classpathResources);
} else {
filesToStageBuilder.addAll(stagingFiles);
}
// TODO(heejong): remove jar_packages experimental flag when cross-language dependency
// management is implemented for all runners.
List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
if (experiments != null) {
Optional<String> jarPackages = experiments.stream().filter((String flag) -> flag.startsWith("jar_packages=")).findFirst();
jarPackages.ifPresent(s -> filesToStageBuilder.addAll(Arrays.asList(s.replaceFirst("jar_packages=", "").split(","))));
}
options.as(PortablePipelineOptions.class).setFilesToStage(filesToStageBuilder.build());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, SdkComponents.create(options));
pipelineProto = DefaultArtifactResolver.INSTANCE.resolveArtifacts(pipelineProto);
PrepareJobRequest prepareJobRequest = PrepareJobRequest.newBuilder().setJobName(options.getJobName()).setPipeline(pipelineProto).setPipelineOptions(PipelineOptionsTranslation.toProto(options)).build();
LOG.info("Using job server endpoint: {}", endpoint);
ManagedChannel jobServiceChannel = channelFactory.forDescriptor(ApiServiceDescriptor.newBuilder().setUrl(endpoint).build());
JobServiceBlockingStub jobService = JobServiceGrpc.newBlockingStub(jobServiceChannel);
try (CloseableResource<JobServiceBlockingStub> wrappedJobService = CloseableResource.of(jobService, unused -> jobServiceChannel.shutdown())) {
final int jobServerTimeout = options.as(PortablePipelineOptions.class).getJobServerTimeout();
PrepareJobResponse prepareJobResponse = jobService.withDeadlineAfter(jobServerTimeout, TimeUnit.SECONDS).withWaitForReady().prepare(prepareJobRequest);
LOG.info("PrepareJobResponse: {}", prepareJobResponse);
ApiServiceDescriptor artifactStagingEndpoint = prepareJobResponse.getArtifactStagingEndpoint();
String stagingSessionToken = prepareJobResponse.getStagingSessionToken();
try (CloseableResource<ManagedChannel> artifactChannel = CloseableResource.of(channelFactory.forDescriptor(artifactStagingEndpoint), ManagedChannel::shutdown)) {
ArtifactStagingService.offer(new ArtifactRetrievalService(), ArtifactStagingServiceGrpc.newStub(artifactChannel.get()), stagingSessionToken);
} catch (CloseableResource.CloseException e) {
LOG.warn("Error closing artifact staging channel", e);
// CloseExceptions should only be thrown while closing the channel.
} catch (Exception e) {
throw new RuntimeException("Error staging files.", e);
}
RunJobRequest runJobRequest = RunJobRequest.newBuilder().setPreparationId(prepareJobResponse.getPreparationId()).build();
// Run the job and wait for a result, we don't set a timeout here because
// it may take a long time for a job to complete and streaming
// jobs never return a response.
RunJobResponse runJobResponse = jobService.run(runJobRequest);
LOG.info("RunJobResponse: {}", runJobResponse);
ByteString jobId = runJobResponse.getJobIdBytes();
return new JobServicePipelineResult(jobId, jobServerTimeout, wrappedJobService.transfer(), cleanup);
} catch (CloseException e) {
throw new RuntimeException(e);
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class SamzaRunner method run.
@Override
public SamzaPipelineResult run(Pipeline pipeline) {
// performance issue.
if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
MetricsEnvironment.setMetricsSupported(true);
if (LOG.isDebugEnabled()) {
LOG.debug("Pre-processed Beam pipeline in dot format:\n{}", PipelineDotRenderer.toDotString(pipeline));
LOG.debug("Pre-processed Beam pipeline in json format:\n{}", PipelineJsonRenderer.toJsonString(pipeline));
}
pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());
final String dotGraph = PipelineDotRenderer.toDotString(pipeline);
LOG.info("Beam pipeline DOT graph:\n{}", dotGraph);
final String jsonGraph = PipelineJsonRenderer.toJsonString(pipeline);
LOG.info("Beam pipeline JSON graph:\n{}", jsonGraph);
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
configBuilder.put(BEAM_DOT_GRAPH, dotGraph);
configBuilder.put(BEAM_JSON_GRAPH, jsonGraph);
final Config config = configBuilder.build();
options.setConfigOverride(config);
if (listener != null) {
listener.onInit(config, options);
}
final SamzaExecutionContext executionContext = new SamzaExecutionContext(options);
final Map<String, MetricsReporterFactory> reporterFactories = getMetricsReporters();
final StreamApplication app = appDescriptor -> {
appDescriptor.withApplicationContainerContextFactory(executionContext.new Factory());
appDescriptor.withMetricsReporterFactories(reporterFactories);
SamzaPipelineTranslator.translate(pipeline, new TranslationContext(appDescriptor, idMap, options));
};
// perform a final round of validation for the pipeline options now that all configs are
// generated
SamzaPipelineOptionsValidator.validate(options);
ApplicationRunner runner = runSamzaApp(app, config);
return new SamzaPipelineResult(runner, executionContext, listener, config);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ReadSourcePortableTest method testExecution.
@Test(timeout = 120_000)
public void testExecution() throws Exception {
PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=use_deprecated_read").create();
options.setRunner(CrashingRunner.class);
options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
options.as(FlinkPipelineOptions.class).setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
Pipeline p = Pipeline.create(options);
PCollection<Long> result = p.apply(Read.from(new Source(10))).apply(Window.into(FixedWindows.of(Duration.millis(1))));
PAssert.that(result).containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReads(p);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
List<RunnerApi.PTransform> readTransforms = pipelineProto.getComponents().getTransformsMap().values().stream().filter(transform -> transform.getSpec().getUrn().equals(PTransformTranslation.READ_TRANSFORM_URN)).collect(Collectors.toList());
assertThat(readTransforms, not(empty()));
// execute the pipeline
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
assertThat(jobInvocation.getState(), not(JobState.Enum.FAILED));
Thread.sleep(100);
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PortablePipelineJarCreator method run.
/**
* <em>Does not actually run the pipeline.</em> Instead bundles the input pipeline along with all
* dependencies, artifacts, etc. required to run the pipeline into a jar that can be executed
* later.
*/
@Override
public PortablePipelineResult run(Pipeline pipeline, JobInfo jobInfo) throws Exception {
PortablePipelineOptions pipelineOptions = PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions()).as(PortablePipelineOptions.class);
final String jobName = jobInfo.jobName();
File outputFile = new File(checkArgumentNotNull(pipelineOptions.getOutputExecutablePath()));
LOG.info("Creating jar {} for job {}", outputFile.getAbsolutePath(), jobName);
outputStream = new JarOutputStream(new FileOutputStream(outputFile), createManifest(mainClass, jobName));
outputChannel = Channels.newChannel(outputStream);
PortablePipelineJarUtils.writeDefaultJobName(outputStream, jobName);
copyResourcesFromJar(new JarFile(mainClass.getProtectionDomain().getCodeSource().getLocation().getPath()));
writeAsJson(PipelineOptionsTranslation.toProto(pipelineOptions), PortablePipelineJarUtils.getPipelineOptionsUri(jobName));
Pipeline pipelineWithClasspathArtifacts = writeArtifacts(pipeline, jobName);
writeAsJson(pipelineWithClasspathArtifacts, PortablePipelineJarUtils.getPipelineUri(jobName));
// Closing the channel also closes the underlying stream.
outputChannel.close();
LOG.info("Jar {} created successfully.", outputFile.getAbsolutePath());
return new JarCreatorPipelineResult();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class InMemoryJobService method prepare.
@Override
public void prepare(PrepareJobRequest request, StreamObserver<PrepareJobResponse> responseObserver) {
try {
LOG.trace("{} {}", PrepareJobRequest.class.getSimpleName(), request);
// insert preparation
String preparationId = String.format("%s_%s", request.getJobName(), UUID.randomUUID().toString());
Struct pipelineOptions = request.getPipelineOptions();
if (pipelineOptions == null) {
throw new NullPointerException("Encountered null pipeline options.");
}
LOG.trace("PIPELINE OPTIONS {} {}", pipelineOptions.getClass(), pipelineOptions);
JobPreparation preparation = JobPreparation.builder().setId(preparationId).setPipeline(request.getPipeline()).setOptions(pipelineOptions).build();
JobPreparation previous = preparations.putIfAbsent(preparationId, preparation);
if (previous != null) {
// this should never happen with a UUID
String errMessage = String.format("A job with the preparation ID \"%s\" already exists.", preparationId);
StatusException exception = Status.NOT_FOUND.withDescription(errMessage).asException();
responseObserver.onError(exception);
return;
}
String stagingSessionToken = stagingServiceTokenProvider.apply(preparationId);
stagingSessionTokens.putIfAbsent(preparationId, stagingSessionToken);
stagingService.getService().registerJob(stagingSessionToken, Maps.transformValues(request.getPipeline().getComponents().getEnvironmentsMap(), RunnerApi.Environment::getDependenciesList));
// send response
PrepareJobResponse response = PrepareJobResponse.newBuilder().setPreparationId(preparationId).setArtifactStagingEndpoint(stagingServiceDescriptor).setStagingSessionToken(stagingSessionToken).build();
responseObserver.onNext(response);
responseObserver.onCompleted();
} catch (Exception e) {
LOG.error("Could not prepare job with name {}", request.getJobName(), e);
responseObserver.onError(Status.INTERNAL.withCause(e).asException());
}
}
Aggregations