use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.
the class GreedyStageFuserTest method materializesWithDifferentEnvSibling.
@Test
public void materializesWithDifferentEnvSibling() {
// (impulse.out) -> read -> read.out -> parDo -> parDo.out
// \
// -> window -> window.out
// Fuses into
// (impulse.out) -> read -> (read.out)
// (read.out) -> parDo -> parDo.out
// (read.out) -> window -> window.out
// The window can't be fused into the stage, which forces the PCollection to be materialized.
// ParDo in this case _could_ be fused into the stage, but is not for simplicity of
// implementation
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("parDo", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("rare", Environments.createDockerEnvironment("rare")).putEnvironments("common", env).build());
PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("read", readTransform)));
assertThat(subgraph.getOutputPCollections(), contains(readOutput));
assertThat(subgraph.getTransforms(), contains(readNode));
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.
the class ExecutableStageTest method testRoundTripToFromTransform.
@Test
public void testRoundTripToFromTransform() throws Exception {
Environment env = org.apache.beam.runners.core.construction.Environments.createDockerEnvironment("foo");
PTransform pt = PTransform.newBuilder().putInputs("input", "input.out").putInputs("side_input", "sideInput.in").putInputs("timer", "timer.out").putOutputs("output", "output.out").putOutputs("timer", "timer.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).putStateSpecs("user_state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("foo").build();
PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
PCollection timer = PCollection.newBuilder().setUniqueName("timer.out").build();
PCollection output = PCollection.newBuilder().setUniqueName("output.out").build();
Components components = Components.newBuilder().putTransforms("pt", pt).putPcollections("input.out", input).putPcollections("sideInput.in", sideInput).putPcollections("timer.out", timer).putPcollections("output.out", output).putEnvironments("foo", env).build();
PTransformNode transformNode = PipelineNode.pTransform("pt", pt);
SideInputReference sideInputRef = SideInputReference.of(transformNode, "side_input", PipelineNode.pCollection("sideInput.in", sideInput));
UserStateReference userStateRef = UserStateReference.of(transformNode, "user_state", PipelineNode.pCollection("input.out", input));
TimerReference timerRef = TimerReference.of(transformNode, "timer");
ImmutableExecutableStage stage = ImmutableExecutableStage.of(components, env, PipelineNode.pCollection("input.out", input), Collections.singleton(sideInputRef), Collections.singleton(userStateRef), Collections.singleton(timerRef), Collections.singleton(PipelineNode.pTransform("pt", pt)), Collections.singleton(PipelineNode.pCollection("output.out", output)), DEFAULT_WIRE_CODER_SETTINGS);
PTransform stagePTransform = stage.toPTransform("foo");
assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
assertThat(stagePTransform.getOutputsCount(), equalTo(1));
assertThat(stagePTransform.getInputsMap(), allOf(hasValue("input.out"), hasValue("sideInput.in")));
assertThat(stagePTransform.getInputsCount(), equalTo(2));
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(stagePTransform.getSpec().getPayload());
assertThat(payload.getTransformsList(), contains("pt"));
assertThat(ExecutableStage.fromPayload(payload), equalTo(stage));
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.
the class RemoteExecutionTest method testSplit.
@Test(timeout = 60000L)
public void testSplit() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
ctxt.output("zero");
ctxt.output(WaitingTillSplitRestrictionTracker.WAIT_TILL_SPLIT);
ctxt.output("two");
}
})).apply("forceSplit", ParDo.of(new DoFn<String, String>() {
@GetInitialRestriction
public String getInitialRestriction(@Element String element) {
return element;
}
@NewTracker
public WaitingTillSplitRestrictionTracker newTracker(@Restriction String restriction) {
return new WaitingTillSplitRestrictionTracker(restriction);
}
@ProcessElement
public void process(RestrictionTracker<String, Void> tracker, ProcessContext context) {
while (tracker.tryClaim(null)) {
}
context.output(tracker.currentRestriction());
}
})).apply("addKeys", WithKeys.of("foo")).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);
// Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
RunnerApi.Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineWithSdfExpanded);
// Find the fused stage with the SDF ProcessSizedElementAndRestriction transform
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> Iterables.filter(stage.getTransforms(), (PTransformNode node) -> PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(node.getTransform().getSpec().getUrn())).iterator().hasNext());
checkState(optionalStage.isPresent(), "Expected a stage with SDF ProcessSizedElementAndRestriction.");
ExecutableStage stage = optionalStage.get();
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("my_stage", stage, dataServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations());
Map<String, ? super Coder<WindowedValue<?>>> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, Collection<? super WindowedValue<?>>> outputValues = new HashMap<>();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, ? super Coder<WindowedValue<?>>> remoteOutputCoder : remoteOutputCoders.entrySet()) {
List<? super WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
outputValues.put(remoteOutputCoder.getKey(), outputContents);
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder) remoteOutputCoder.getValue(), (FnDataReceiver<? super WindowedValue<?>>) outputContents::add));
}
List<ProcessBundleSplitResponse> splitResponses = new ArrayList<>();
List<ProcessBundleResponse> checkpointResponses = new ArrayList<>();
List<String> requestsFinalization = new ArrayList<>();
ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
ScheduledFuture<Object> future;
// Execute the remote bundle.
try (RemoteBundle bundle = processor.newBundle(outputReceivers, Collections.emptyMap(), StateRequestHandler.unsupported(), BundleProgressHandler.ignored(), splitResponses::add, checkpointResponses::add, requestsFinalization::add)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(sdfSizedElementAndRestrictionForTest(WaitingTillSplitRestrictionTracker.WAIT_TILL_SPLIT)));
// Keep sending splits until the bundle terminates.
future = (ScheduledFuture) executor.scheduleWithFixedDelay(() -> bundle.split(0.5), 0L, 100L, TimeUnit.MILLISECONDS);
}
future.cancel(false);
executor.shutdown();
assertTrue(requestsFinalization.isEmpty());
assertTrue(checkpointResponses.isEmpty());
// We only validate the last split response since it is the only one that could possibly
// contain the SDF split, all others will be a reduction in the ChannelSplit range.
assertFalse(splitResponses.isEmpty());
ProcessBundleSplitResponse splitResponse = splitResponses.get(splitResponses.size() - 1);
ChannelSplit channelSplit = Iterables.getOnlyElement(splitResponse.getChannelSplitsList());
// There is only one outcome for the final split that can happen since the SDF is blocking the
// bundle from completing and hence needed to be split.
assertEquals(-1L, channelSplit.getLastPrimaryElement());
assertEquals(1L, channelSplit.getFirstResidualElement());
assertEquals(1, splitResponse.getPrimaryRootsCount());
assertEquals(1, splitResponse.getResidualRootsCount());
assertThat(Iterables.getOnlyElement(outputValues.values()), containsInAnyOrder(valueInGlobalWindow(KV.of("foo", WaitingTillSplitRestrictionTracker.PRIMARY))));
}
Aggregations