use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ExpansionService method expand.
@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
LOG.debug("Full transform: {}", request.getTransform());
Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
Pipeline pipeline = createPipeline();
boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
if (!isUseDeprecatedRead) {
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
// TODO(BEAM-10670): Remove this when we address performance issue.
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
} else {
LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
}
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
try {
return rehydratedComponents.getPCollection(input.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
String urn = request.getTransform().getSpec().getUrn();
TransformProvider transformProvider = null;
if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
assert allowList != null;
transformProvider = new JavaClassLookupTransformProvider(allowList);
} else {
transformProvider = getRegisteredTransforms().get(urn);
if (transformProvider == null) {
throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
}
}
List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
// Needed to find which transform was new...
SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
try {
return sdkComponents.registerPCollection(output.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
if (isUseDeprecatedRead) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
RunnerApi.Components components = pipelineProto.getComponents();
RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
LOG.debug("Expanded to {}", expandedTransform);
return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ExpansionServiceTest method testConstruct.
@Test
public void testConstruct() {
Pipeline p = Pipeline.create();
p.apply(Impulse.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
String inputPcollId = Iterables.getOnlyElement(Iterables.getOnlyElement(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(TEST_URN)).putInputs("input", inputPcollId)).setNamespace(TEST_NAMESPACE).build();
ExpansionApi.ExpansionResponse response = expansionService.expand(request);
RunnerApi.PTransform expandedTransform = response.getTransform();
assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
// Verify it has the right input.
assertThat(expandedTransform.getInputsMap().values(), contains(inputPcollId));
// Verify it has the right output.
assertThat(expandedTransform.getOutputsMap().keySet(), contains("output"));
// Loose check that it's composite, and its children are represented.
assertThat(expandedTransform.getSubtransformsCount(), greaterThan(0));
for (String subtransform : expandedTransform.getSubtransformsList()) {
assertTrue(response.getComponents().containsTransforms(subtransform));
}
// Check that any newly generated components are properly namespaced.
Set<String> originalIds = allIds(request.getComponents());
for (String id : allIds(response.getComponents())) {
assertTrue(id, id.startsWith(TEST_NAMESPACE) || originalIds.contains(id));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ExpansionServiceTest method testConstructGenerateSequenceWithRegistration.
@Test
public void testConstructGenerateSequenceWithRegistration() {
ExternalTransforms.ExternalConfigurationPayload payload = encodeRowIntoExternalConfigurationPayload(Row.withSchema(Schema.of(Field.of("start", FieldType.INT64), Field.nullable("stop", FieldType.INT64))).withFieldValue("start", 0L).withFieldValue("stop", 1L).build());
Pipeline p = Pipeline.create();
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(GenerateSequence.External.URN).setPayload(payload.toByteString()))).setNamespace(TEST_NAMESPACE).build();
ExpansionApi.ExpansionResponse response = expansionService.expand(request);
RunnerApi.PTransform expandedTransform = response.getTransform();
assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
assertThat(expandedTransform.getInputsCount(), Matchers.is(0));
assertThat(expandedTransform.getOutputsCount(), Matchers.is(1));
assertThat(expandedTransform.getSubtransformsCount(), greaterThan(0));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowRunnerHarness method main.
/**
* Fetches and processes work units from the Dataflow service.
*/
public static void main(String[] unusedArgs) throws Exception {
RunnerApi.@Nullable Pipeline pipeline = DataflowWorkerHarnessHelper.getPipelineFromEnv();
// This descriptor is used for all services except logging. They are isolated to keep
// critical traffic protected from best effort traffic.
ApiServiceDescriptor controlApiService = DataflowWorkerHarnessHelper.getControlDescriptor();
ApiServiceDescriptor loggingApiService = DataflowWorkerHarnessHelper.getLoggingDescriptor();
ApiServiceDescriptor statusApiService = DataflowWorkerHarnessHelper.getStatusDescriptor();
LOG.info("{} started, using port {} for control, {} for logging.", DataflowRunnerHarness.class, controlApiService, loggingApiService);
DataflowWorkerHarnessHelper.initializeLogging(DataflowRunnerHarness.class);
DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions(DataflowRunnerHarness.class);
DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);
// Initialized registered file systems.˜
FileSystems.setDefaultPipelineOptions(pipelineOptions);
DataflowPipelineDebugOptions dataflowOptions = pipelineOptions.as(DataflowPipelineDebugOptions.class);
ServerFactory serverFactory;
if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll_domain_socket")) {
serverFactory = ServerFactory.createEpollDomainSocket();
} else if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll")) {
serverFactory = ServerFactory.createEpollSocket();
} else {
serverFactory = ServerFactory.createDefault();
}
ServerStreamObserverFactory streamObserverFactory = ServerStreamObserverFactory.fromOptions(pipelineOptions);
Server servicesServer = null;
Server loggingServer = null;
Server statusServer = null;
try (BeamFnLoggingService beamFnLoggingService = new BeamFnLoggingService(loggingApiService, DataflowWorkerLoggingInitializer.getSdkLoggingHandler()::publish, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamFnControlService beamFnControlService = new BeamFnControlService(controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamFnDataGrpcService beamFnDataService = new BeamFnDataGrpcService(pipelineOptions, controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamWorkerStatusGrpcService beamWorkerStatusGrpcService = statusApiService == null ? null : BeamWorkerStatusGrpcService.create(statusApiService, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
GrpcStateService beamFnStateService = GrpcStateService.create()) {
servicesServer = serverFactory.create(ImmutableList.of(beamFnControlService, beamFnDataService, beamFnStateService), controlApiService);
loggingServer = serverFactory.create(ImmutableList.of(beamFnLoggingService), loggingApiService);
// gRPC server for obtaining SDK harness runtime status information.
if (beamWorkerStatusGrpcService != null) {
statusServer = serverFactory.create(ImmutableList.of(beamWorkerStatusGrpcService), statusApiService);
}
start(pipeline, pipelineOptions, beamFnControlService, beamFnDataService, controlApiService, beamFnStateService, beamWorkerStatusGrpcService);
if (statusServer != null) {
statusServer.shutdown();
}
servicesServer.shutdown();
loggingServer.shutdown();
// wait 30 secs for outstanding requests to finish.
if (statusServer != null) {
statusServer.awaitTermination(30, TimeUnit.SECONDS);
}
servicesServer.awaitTermination(30, TimeUnit.SECONDS);
loggingServer.awaitTermination(30, TimeUnit.SECONDS);
} finally {
if (statusServer != null && !statusServer.isTerminated()) {
statusServer.shutdownNow();
}
if (servicesServer != null && !servicesServer.isTerminated()) {
servicesServer.shutdownNow();
}
if (loggingServer != null && !loggingServer.isTerminated()) {
loggingServer.shutdownNow();
}
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowWorkerHarnessHelper method getPipelineFromEnv.
// TODO: make env logic private to main() so it is never done outside of initializing the process
public static RunnerApi.@Nullable Pipeline getPipelineFromEnv() throws IOException {
String pipelinePath = System.getenv(PIPELINE_PATH);
if (pipelinePath == null) {
LOG.warn("Missing pipeline environment variable '{}'", PIPELINE_PATH);
return null;
}
File pipelineFile = new File(System.getenv(PIPELINE_PATH));
if (!pipelineFile.exists()) {
LOG.warn("Pipeline path '{}' does not exist", pipelineFile);
return null;
}
try (FileInputStream inputStream = new FileInputStream(pipelineFile)) {
RunnerApi.Pipeline pipelineProto = RunnerApi.Pipeline.parseFrom(inputStream);
LOG.info("Found portable pipeline:\n{}", TextFormat.printToString(pipelineProto));
return pipelineProto;
}
}
Aggregations