use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitResponse.ChannelSplit in project beam by apache.
the class RemoteExecutionTest method testSplit.
@Test(timeout = 60000L)
public void testSplit() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
ctxt.output("zero");
ctxt.output(WaitingTillSplitRestrictionTracker.WAIT_TILL_SPLIT);
ctxt.output("two");
}
})).apply("forceSplit", ParDo.of(new DoFn<String, String>() {
@GetInitialRestriction
public String getInitialRestriction(@Element String element) {
return element;
}
@NewTracker
public WaitingTillSplitRestrictionTracker newTracker(@Restriction String restriction) {
return new WaitingTillSplitRestrictionTracker(restriction);
}
@ProcessElement
public void process(RestrictionTracker<String, Void> tracker, ProcessContext context) {
while (tracker.tryClaim(null)) {
}
context.output(tracker.currentRestriction());
}
})).apply("addKeys", WithKeys.of("foo")).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);
// Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
RunnerApi.Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipeline, SplittableParDoExpander.createSizedReplacement());
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineWithSdfExpanded);
// Find the fused stage with the SDF ProcessSizedElementAndRestriction transform
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> Iterables.filter(stage.getTransforms(), (PTransformNode node) -> PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(node.getTransform().getSpec().getUrn())).iterator().hasNext());
checkState(optionalStage.isPresent(), "Expected a stage with SDF ProcessSizedElementAndRestriction.");
ExecutableStage stage = optionalStage.get();
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("my_stage", stage, dataServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations());
Map<String, ? super Coder<WindowedValue<?>>> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, Collection<? super WindowedValue<?>>> outputValues = new HashMap<>();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, ? super Coder<WindowedValue<?>>> remoteOutputCoder : remoteOutputCoders.entrySet()) {
List<? super WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
outputValues.put(remoteOutputCoder.getKey(), outputContents);
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder) remoteOutputCoder.getValue(), (FnDataReceiver<? super WindowedValue<?>>) outputContents::add));
}
List<ProcessBundleSplitResponse> splitResponses = new ArrayList<>();
List<ProcessBundleResponse> checkpointResponses = new ArrayList<>();
List<String> requestsFinalization = new ArrayList<>();
ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
ScheduledFuture<Object> future;
// Execute the remote bundle.
try (RemoteBundle bundle = processor.newBundle(outputReceivers, Collections.emptyMap(), StateRequestHandler.unsupported(), BundleProgressHandler.ignored(), splitResponses::add, checkpointResponses::add, requestsFinalization::add)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(sdfSizedElementAndRestrictionForTest(WaitingTillSplitRestrictionTracker.WAIT_TILL_SPLIT)));
// Keep sending splits until the bundle terminates.
future = (ScheduledFuture) executor.scheduleWithFixedDelay(() -> bundle.split(0.5), 0L, 100L, TimeUnit.MILLISECONDS);
}
future.cancel(false);
executor.shutdown();
assertTrue(requestsFinalization.isEmpty());
assertTrue(checkpointResponses.isEmpty());
// We only validate the last split response since it is the only one that could possibly
// contain the SDF split, all others will be a reduction in the ChannelSplit range.
assertFalse(splitResponses.isEmpty());
ProcessBundleSplitResponse splitResponse = splitResponses.get(splitResponses.size() - 1);
ChannelSplit channelSplit = Iterables.getOnlyElement(splitResponse.getChannelSplitsList());
// There is only one outcome for the final split that can happen since the SDF is blocking the
// bundle from completing and hence needed to be split.
assertEquals(-1L, channelSplit.getLastPrimaryElement());
assertEquals(1L, channelSplit.getFirstResidualElement());
assertEquals(1, splitResponse.getPrimaryRootsCount());
assertEquals(1, splitResponse.getResidualRootsCount());
assertThat(Iterables.getOnlyElement(outputValues.values()), containsInAnyOrder(valueInGlobalWindow(KV.of("foo", WaitingTillSplitRestrictionTracker.PRIMARY))));
}
Aggregations