use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class SideInputReference method fromSideInputId.
/**
* Create a side input reference from a SideInputId proto and components.
*/
public static SideInputReference fromSideInputId(SideInputId sideInputId, RunnerApi.Components components) {
String transformId = sideInputId.getTransformId();
String localName = sideInputId.getLocalName();
String collectionId = components.getTransformsOrThrow(transformId).getInputsOrThrow(localName);
PTransform transform = components.getTransformsOrThrow(transformId);
PCollection collection = components.getPcollectionsOrThrow(collectionId);
return SideInputReference.of(PipelineNode.pTransform(transformId, transform), localName, PipelineNode.pCollection(collectionId, collection));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class InsertFetchAndFilterStreamingSideInputNodesTest method testSdkParDoWithSideInput.
@Test
public void testSdkParDoWithSideInput() throws Exception {
Pipeline p = Pipeline.create();
PCollection<String> pc = p.apply(Create.of("a", "b", "c"));
PCollectionView<List<String>> pcView = pc.apply(View.asList());
pc.apply(ParDo.of(new TestDoFn()).withSideInputs(pcView));
RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);
Node predecessor = createParDoNode("predecessor");
InstructionOutputNode mainInput = InstructionOutputNode.create(new InstructionOutput(), "fakeId");
Node sideInputParDo = createParDoNode(findParDoWithSideInput(pipeline));
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(predecessor);
network.addNode(mainInput);
network.addNode(sideInputParDo);
network.addEdge(predecessor, mainInput, DefaultEdge.create());
network.addEdge(mainInput, sideInputParDo, DefaultEdge.create());
network = InsertFetchAndFilterStreamingSideInputNodes.with(pipeline).forNetwork(network);
Node mainInputClone = InstructionOutputNode.create(mainInput.getInstructionOutput(), "fakeId");
Node fetchAndFilter = FetchAndFilterStreamingSideInputsNode.create(pcView.getWindowingStrategyInternal(), ImmutableMap.of(pcView, ParDoTranslation.translateWindowMappingFn(pcView.getWindowMappingFn(), SdkComponents.create(PipelineOptionsFactory.create()))), NameContextsForTests.nameContextForTest());
MutableNetwork<Node, Edge> expectedNetwork = createEmptyNetwork();
expectedNetwork.addNode(predecessor);
expectedNetwork.addNode(mainInputClone);
expectedNetwork.addNode(fetchAndFilter);
expectedNetwork.addNode(mainInput);
expectedNetwork.addNode(sideInputParDo);
expectedNetwork.addEdge(predecessor, mainInputClone, DefaultEdge.create());
expectedNetwork.addEdge(mainInputClone, fetchAndFilter, DefaultEdge.create());
expectedNetwork.addEdge(fetchAndFilter, mainInput, DefaultEdge.create());
expectedNetwork.addEdge(mainInput, sideInputParDo, DefaultEdge.create());
assertThatNetworksAreIdentical(expectedNetwork, network);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class RegisterNodeFunction method transformSideInputForSdk.
/**
* Modifies the process bundle descriptor and updates the PTransform that the SDK harness will see
* with length prefixed coders used on the side input PCollection and windowing strategy.
*/
private static final void transformSideInputForSdk(RunnerApi.Pipeline pipeline, RunnerApi.PTransform originalPTransform, String sideInputTag, ProcessBundleDescriptor.Builder processBundleDescriptor, RunnerApi.PTransform.Builder updatedPTransform) {
RunnerApi.PCollection sideInputPCollection = pipeline.getComponents().getPcollectionsOrThrow(originalPTransform.getInputsOrThrow(sideInputTag));
RunnerApi.WindowingStrategy sideInputWindowingStrategy = pipeline.getComponents().getWindowingStrategiesOrThrow(sideInputPCollection.getWindowingStrategyId());
// TODO: We should not length prefix the window or key for the SDK side since the
// key and window are already length delimited via protobuf itself. But we need to
// maintain the length prefixing within the Runner harness to match the bytes that were
// materialized to the side input sink.
// We take the original pipeline coders and add any coders we have added when processing side
// inputs before building new length prefixed variants.
RunnerApi.Components.Builder componentsBuilder = pipeline.getComponents().toBuilder();
componentsBuilder.putAllCoders(processBundleDescriptor.getCodersMap());
String updatedSdkSideInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputPCollection.getCoderId(), componentsBuilder, false);
String updatedSdkSideInputWindowCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputWindowingStrategy.getWindowCoderId(), componentsBuilder, false);
processBundleDescriptor.putAllCoders(componentsBuilder.getCodersMap());
String updatedSdkWindowingStrategyId = SyntheticComponents.uniqueId(sideInputPCollection.getWindowingStrategyId() + "-runner_generated", processBundleDescriptor.getWindowingStrategiesMap().keySet()::contains);
processBundleDescriptor.putWindowingStrategies(updatedSdkWindowingStrategyId, sideInputWindowingStrategy.toBuilder().setWindowCoderId(updatedSdkSideInputWindowCoderId).build());
RunnerApi.PCollection updatedSdkSideInputPcollection = sideInputPCollection.toBuilder().setCoderId(updatedSdkSideInputCoderId).setWindowingStrategyId(updatedSdkWindowingStrategyId).build();
// Replace the contents of the PCollection with the updated side input PCollection
// specification and insert it into the update PTransform.
processBundleDescriptor.putPcollections(originalPTransform.getInputsOrThrow(sideInputTag), updatedSdkSideInputPcollection);
updatedPTransform.putInputs(sideInputTag, originalPTransform.getInputsOrThrow(sideInputTag));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class ReadSourcePortableTest method testExecution.
@Test(timeout = 120_000)
public void testExecution() throws Exception {
PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=use_deprecated_read").create();
options.setRunner(CrashingRunner.class);
options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
options.as(FlinkPipelineOptions.class).setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
Pipeline p = Pipeline.create(options);
PCollection<Long> result = p.apply(Read.from(new Source(10))).apply(Window.into(FixedWindows.of(Duration.millis(1))));
PAssert.that(result).containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReads(p);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
List<RunnerApi.PTransform> readTransforms = pipelineProto.getComponents().getTransformsMap().values().stream().filter(transform -> transform.getSpec().getUrn().equals(PTransformTranslation.READ_TRANSFORM_URN)).collect(Collectors.toList());
assertThat(readTransforms, not(empty()));
// execute the pipeline
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
assertThat(jobInvocation.getState(), not(JobState.Enum.FAILED));
Thread.sleep(100);
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class DataflowPipelineTranslatorTest method testMultiGraphPipelineSerialization.
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));
input.apply(new UnrelatedOutputCreator());
input.apply(new UnboundOutputCreator());
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(PipelineOptionsFactory.as(DataflowPipelineOptions.class));
// Check that translation doesn't fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification jobSpecification = t.translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
Aggregations