use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class QueryablePipelineTest method forTransformsWithSubgraph.
@Test
public void forTransformsWithSubgraph() {
Components components = Components.newBuilder().putTransforms("root", PTransform.newBuilder().putOutputs("output", "output.out").build()).putPcollections("output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()).putTransforms("consumer", PTransform.newBuilder().putInputs("input", "output.out").build()).putTransforms("ignored", PTransform.newBuilder().putInputs("input", "output.out").build()).build();
QueryablePipeline pipeline = QueryablePipeline.forTransforms(ImmutableSet.of("root", "consumer"), components);
assertThat(pipeline.getRootTransforms(), contains(PipelineNode.pTransform("root", components.getTransformsOrThrow("root"))));
Set<PTransformNode> consumers = pipeline.getPerElementConsumers(PipelineNode.pCollection("output.out", components.getPcollectionsOrThrow("output.out")));
assertThat(consumers, contains(PipelineNode.pTransform("consumer", components.getTransformsOrThrow("consumer"))));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ProcessBundleDescriptors method fromExecutableStageInternal.
private static ExecutableProcessBundleDescriptor fromExecutableStageInternal(String id, ExecutableStage stage, ApiServiceDescriptor dataEndpoint, @Nullable ApiServiceDescriptor stateEndpoint) throws IOException {
// Create with all of the processing transforms, and all of the components.
// TODO: Remove the unreachable subcomponents if the size of the descriptor matters.
Map<String, PTransform> stageTransforms = stage.getTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform));
Components.Builder components = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(stageTransforms);
ImmutableList.Builder<RemoteInputDestination> inputDestinationsBuilder = ImmutableList.builder();
ImmutableMap.Builder<String, Coder> remoteOutputCodersBuilder = ImmutableMap.builder();
WireCoderSetting wireCoderSetting = stage.getWireCoderSettings().stream().filter(ws -> ws.getInputOrOutputId().equals(stage.getInputPCollection().getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
// The order of these does not matter.
inputDestinationsBuilder.add(addStageInput(dataEndpoint, stage.getInputPCollection(), components, wireCoderSetting));
remoteOutputCodersBuilder.putAll(addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSettings()));
Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
Map<String, Map<String, BagUserStateSpec>> bagUserStateSpecs = forBagUserStates(stage, components.build());
Map<String, Map<String, TimerSpec>> timerSpecs = forTimerSpecs(stage, components);
lengthPrefixAnyInputCoder(stage.getInputPCollection().getId(), components);
// Copy data from components to ProcessBundleDescriptor.
ProcessBundleDescriptor.Builder bundleDescriptorBuilder = ProcessBundleDescriptor.newBuilder().setId(id);
if (stateEndpoint != null) {
bundleDescriptorBuilder.setStateApiServiceDescriptor(stateEndpoint);
}
if (timerSpecs.size() > 0) {
// By default use the data endpoint for timers, in the future considering enabling specifying
// a different ApiServiceDescriptor for timers.
bundleDescriptorBuilder.setTimerApiServiceDescriptor(dataEndpoint);
}
bundleDescriptorBuilder.putAllCoders(components.getCodersMap()).putAllEnvironments(components.getEnvironmentsMap()).putAllPcollections(components.getPcollectionsMap()).putAllWindowingStrategies(components.getWindowingStrategiesMap()).putAllTransforms(components.getTransformsMap());
return ExecutableProcessBundleDescriptor.of(bundleDescriptorBuilder.build(), inputDestinationsBuilder.build(), remoteOutputCodersBuilder.build(), sideInputSpecs, bagUserStateSpecs, timerSpecs);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ParDoTranslation method translateParDo.
public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
final Pipeline pipeline = appliedPTransform.getPipeline();
final DoFn<?, ?> doFn = parDo.getFn();
// Get main input.
Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ExecutableStage method fromPayload.
/**
* Return an {@link ExecutableStage} constructed from the provided {@link FunctionSpec}
* representation.
*
* <p>See {@link #toPTransform} for how the payload is constructed.
*
* <p>Note: The payload contains some information redundant with the {@link PTransform} it is the
* payload of. The {@link ExecutableStagePayload} should be sufficiently rich to construct a
* {@code ProcessBundleDescriptor} using only the payload.
*/
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
Components components = payload.getComponents();
Environment environment = payload.getEnvironment();
Collection<WireCoderSetting> wireCoderSettings = payload.getWireCoderSettingsList();
PCollectionNode input = PipelineNode.pCollection(payload.getInput(), components.getPcollectionsOrThrow(payload.getInput()));
List<SideInputReference> sideInputs = payload.getSideInputsList().stream().map(sideInputId -> SideInputReference.fromSideInputId(sideInputId, components)).collect(Collectors.toList());
List<UserStateReference> userStates = payload.getUserStatesList().stream().map(userStateId -> UserStateReference.fromUserStateId(userStateId, components)).collect(Collectors.toList());
List<TimerReference> timers = payload.getTimersList().stream().map(timerId -> TimerReference.fromTimerId(timerId, components)).collect(Collectors.toList());
List<PTransformNode> transforms = payload.getTransformsList().stream().map(id -> PipelineNode.pTransform(id, components.getTransformsOrThrow(id))).collect(Collectors.toList());
List<PCollectionNode> outputs = payload.getOutputsList().stream().map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id))).collect(Collectors.toList());
return ImmutableExecutableStage.of(components, environment, input, sideInputs, userStates, timers, transforms, outputs, wireCoderSettings);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class FusedPipeline method toPipeline.
/**
* Returns the {@link RunnerApi.Pipeline} representation of this {@link FusedPipeline}.
*
* <p>The {@link Components} of the returned pipeline will contain all of the {@link PTransform
* PTransforms} present in the original Pipeline that this {@link FusedPipeline} was created from,
* plus all of the {@link ExecutableStage ExecutableStages} contained within this {@link
* FusedPipeline}. The {@link Pipeline#getRootTransformIdsList()} will contain all of the runner
* executed transforms and all of the {@link ExecutableStage execuable stages} contained within
* the Pipeline.
*/
public RunnerApi.Pipeline toPipeline() {
Map<String, PTransform> executableStageTransforms = getEnvironmentExecutedTransforms();
Set<String> executableTransformIds = Sets.union(executableStageTransforms.keySet(), getRunnerExecutedTransforms().stream().map(PTransformNode::getId).collect(Collectors.toSet()));
// Augment the initial transforms with all of the executable transforms.
Components fusedComponents = getComponents().toBuilder().putAllTransforms(executableStageTransforms).build();
List<String> rootTransformIds = StreamSupport.stream(QueryablePipeline.forTransforms(executableTransformIds, fusedComponents).getTopologicallyOrderedTransforms().spliterator(), false).map(PTransformNode::getId).collect(Collectors.toList());
Pipeline res = Pipeline.newBuilder().setComponents(fusedComponents).addAllRootTransformIds(rootTransformIds).addAllRequirements(getRequirements()).build();
// Validate that fusion didn't produce a malformed pipeline.
PipelineValidator.validate(res);
return res;
}
Aggregations