use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class GreedyStageFuserTest method sideInputIncludedInStage.
@Test
public void sideInputIncludedInStage() {
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().setUniqueName("read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform parDoTransform = PTransform.newBuilder().setUniqueName("parDo").putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
PCollection sideInputPCollection = PCollection.newBuilder().setUniqueName("side_read.out").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setUniqueName("side_read").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", sideInputPCollection).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("read", readTransform)));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, readOutput, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform)));
PTransformNode parDoNode = PipelineNode.pTransform("parDo", parDoTransform);
SideInputReference sideInputRef = SideInputReference.of(parDoNode, "side_input", PipelineNode.pCollection("side_read.out", sideInputPCollection));
assertThat(subgraph.getSideInputs(), contains(sideInputRef));
assertThat(subgraph.getOutputPCollections(), emptyIterable());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class GreedyStageFuserTest method materializesWithDifferentEnvConsumer.
@Test
public void materializesWithDifferentEnvConsumer() {
// (impulse.out) -> parDo -> parDo.out -> window -> window.out
// Fuses into
// (impulse.out) -> parDo -> (parDo.out)
// (parDo.out) -> window -> window.out
Environment env = Environments.createDockerEnvironment("common");
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("out", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", parDoOutput).putTransforms("window", PTransform.newBuilder().putInputs("input", "parDo.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("rare", Environments.createDockerEnvironment("rare")).putEnvironments("common", env).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("parDo.out", parDoOutput)));
assertThat(subgraph.getInputPCollection(), equalTo(impulseOutputNode));
assertThat(subgraph.getEnvironment(), equalTo(env));
assertThat(subgraph.getTransforms(), contains(PipelineNode.pTransform("parDo", parDoTransform)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class GreedyStageFuserTest method executableStageProducingSideInputMaterializesIt.
@Test
public void executableStageProducingSideInputMaterializesIt() {
// impulse -- ParDo(createSide)
// \_ ParDo(processMain) with side input from createSide
// The ExecutableStage executing createSide must have an output.
Environment env = Environments.createDockerEnvironment("common");
PTransform impulse = PTransform.newBuilder().setUniqueName("impulse").putOutputs("output", "impulsePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build();
PTransform createSide = PTransform.newBuilder().setUniqueName("createSide").putInputs("input", "impulsePC").putOutputs("output", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform processMain = PTransform.newBuilder().setUniqueName("processMain").putInputs("main", "impulsePC").putInputs("side", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
PCollection impulsePC = PCollection.newBuilder().setUniqueName("impulsePC").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("impulse", impulse).putTransforms("createSide", createSide).putTransforms("processMain", processMain).putPcollections("impulsePC", impulsePC).putPcollections("sidePC", sidePC).putEnvironments("common", env).build());
PCollectionNode impulseOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("impulse", impulse)));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutput, ImmutableSet.of(PipelineNode.pTransform("createSide", createSide)));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("sidePC", sidePC)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class RemoteExecutionTest method testExecutionWithMultipleStages.
@Test
public void testExecutionWithMultipleStages() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
Function<String, PCollection<String>> pCollectionGenerator = suffix -> p.apply("impulse" + suffix, Impulse.create()).apply("create" + suffix, ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void process(ProcessContext c) {
try {
c.output(CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), c.element()));
} catch (CoderException e) {
throw new RuntimeException(e);
}
}
})).setCoder(StringUtf8Coder.of()).apply(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output("stream" + suffix + c.element());
}
}));
PCollection<String> input1 = pCollectionGenerator.apply("1");
PCollection<String> input2 = pCollectionGenerator.apply("2");
PCollection<String> outputMerged = PCollectionList.of(input1).and(input2).apply(Flatten.pCollections());
outputMerged.apply("createKV", ParDo.of(new DoFn<String, KV<String, String>>() {
@ProcessElement
public void process(ProcessContext c) {
c.output(KV.of(c.element(), ""));
}
})).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Set<ExecutableStage> stages = fused.getFusedStages();
assertThat(stages.size(), equalTo(2));
List<WindowedValue<?>> outputValues = Collections.synchronizedList(new ArrayList<>());
for (ExecutableStage stage : stages) {
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage(stage.toString(), stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
outputReceivers.putIfAbsent(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputValues::add));
}
try (RemoteBundle bundle = processor.newBundle(outputReceivers, StateRequestHandler.unsupported(), BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "X")));
}
}
assertThat(outputValues, containsInAnyOrder(valueInGlobalWindow(KV.of("stream1X", "")), valueInGlobalWindow(KV.of("stream2X", ""))));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class ProcessBundleDescriptors method addSideInputs.
private static Map<String, Map<String, SideInputSpec>> addSideInputs(ExecutableStage stage, Components.Builder components) throws IOException {
ImmutableTable.Builder<String, String, SideInputSpec> idsToSpec = ImmutableTable.builder();
for (SideInputReference sideInputReference : stage.getSideInputs()) {
// Update the coder specification for side inputs to be length prefixed so that the
// SDK and Runner agree on how to encode/decode the key, window, and values for
// side inputs.
PCollectionNode pcNode = sideInputReference.collection();
PCollection pc = pcNode.getPCollection();
String lengthPrefixedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(pc.getCoderId(), components, false);
components.putPcollections(pcNode.getId(), pc.toBuilder().setCoderId(lengthPrefixedCoderId).build());
FullWindowedValueCoder<KV<?, ?>> coder = (FullWindowedValueCoder) WireCoders.instantiateRunnerWireCoder(pcNode, components.build());
idsToSpec.put(sideInputReference.transform().getId(), sideInputReference.localName(), SideInputSpec.of(sideInputReference.transform().getId(), sideInputReference.localName(), getAccessPattern(sideInputReference), coder.getValueCoder(), coder.getWindowCoder()));
}
return idsToSpec.build().rowMap();
}
Aggregations