use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testWatermarkHandling.
@Test
public void testWatermarkHandling() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
ExecutableStageDoFnOperator<KV<String, Integer>, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))), StringUtf8Coder.of(), WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), IntervalWindow.getCoder()));
KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, val -> val.getValue().getKey(), new CoderTypeInformation<>(StringUtf8Coder.of(), FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.<KV<String, String>, FnDataReceiver<WindowedValue>>builder().put(KV.of("transform", "timer"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer2"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer3"), Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// No bundle has been started, watermark can be freely advanced
testHarness.processWatermark(0);
assertThat(operator.getCurrentOutputWatermark(), is(0L));
// Trigger a new bundle
IntervalWindow intervalWindow = new IntervalWindow(new Instant(0), new Instant(9));
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), Instant.now(), intervalWindow, PaneInfo.NO_FIRING);
testHarness.processElement(new StreamRecord<>(windowedValue));
// The output watermark should be held back during the bundle
testHarness.processWatermark(1);
assertThat(operator.getEffectiveInputWatermark(), is(1L));
assertThat(operator.getCurrentOutputWatermark(), is(0L));
// After the bundle has been finished, the watermark should be advanced
operator.invokeFinishBundle();
assertThat(operator.getCurrentOutputWatermark(), is(1L));
// Bundle finished, watermark can be freely advanced
testHarness.processWatermark(2);
assertThat(operator.getEffectiveInputWatermark(), is(2L));
assertThat(operator.getCurrentOutputWatermark(), is(2L));
// Trigger a new bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
// cleanup timer
assertThat(testHarness.numEventTimeTimers(), is(1));
// Set at timer
Instant timerTarget = new Instant(5);
Instant timerTarget2 = new Instant(6);
operator.getLockToAcquireForStateAccessDuringBundles().lock();
BiConsumer<String, Instant> timerConsumer = (timerId, timestamp) -> operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), timestamp, timestamp, PaneInfo.NO_FIRING), TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId("transform", timerId), StateNamespaces.window(IntervalWindow.getCoder(), intervalWindow), timestamp, timestamp, TimeDomain.EVENT_TIME));
timerConsumer.accept("timer", timerTarget);
timerConsumer.accept("timer2", timerTarget2);
assertThat(testHarness.numEventTimeTimers(), is(3));
// Advance input watermark past the timer
// Check the output watermark is held back
long targetWatermark = timerTarget.getMillis() + 100;
testHarness.processWatermark(targetWatermark);
// Do not yet advance the output watermark because we are still processing a bundle
assertThat(testHarness.numEventTimeTimers(), is(3));
assertThat(operator.getCurrentOutputWatermark(), is(2L));
// Check that the timers are fired but the output watermark is advanced no further than
// the minimum timer timestamp of the previous bundle because we are still processing a
// bundle which might contain more timers.
// Timers can create loops if they keep rescheduling themselves when firing
// Thus, we advance the watermark asynchronously to allow for checkpointing to run
operator.invokeFinishBundle();
assertThat(testHarness.numEventTimeTimers(), is(3));
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(operator.getCurrentOutputWatermark(), is(5L));
// Output watermark is advanced synchronously when the bundle finishes,
// no more timers are scheduled
operator.invokeFinishBundle();
assertThat(operator.getCurrentOutputWatermark(), is(targetWatermark));
assertThat(testHarness.numEventTimeTimers(), is(0));
// Watermark is advanced in a blocking fashion on close, not via a timers
// Create a bundle with a pending timer to simulate that
testHarness.processElement(new StreamRecord<>(windowedValue));
timerConsumer.accept("timer3", new Instant(targetWatermark));
assertThat(testHarness.numEventTimeTimers(), is(1));
// This should be blocking until the watermark reaches Long.MAX_VALUE.
testHarness.close();
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(operator.getCurrentOutputWatermark(), is(Long.MAX_VALUE));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.
private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
KV<String, String> timerInputKey = KV.of("transformId", "timerId");
AtomicBoolean timerInputReceived = new AtomicBoolean();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
doAnswer((invocation) -> {
timerInputReceived.set(true);
return null;
}).when(timerReceiver).accept(any());
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
stateBackendLock.lock();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
keyedStateBackend.setCurrentKey(key);
DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
assertThat(testHarness.numKeyedStateEntries(), is(1));
// user timer that fires after the end of the window and after state cleanup
TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
timerInternals.setTimer(userTimer);
// start of bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
verify(receiver).accept(windowedValue);
// move watermark past user timer while bundle is in progress
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
// Output watermark is held back and timers do not yet fire (they can still be changed!)
assertThat(timerInputReceived.get(), is(false));
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// The timer fires on bundle finish
operator.invokeFinishBundle();
assertThat(timerInputReceived.getAndSet(false), is(true));
// Move watermark past the cleanup timer
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
operator.invokeFinishBundle();
// Cleanup timer has fired and cleanup queue is prepared for bundle finish
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(testHarness.numKeyedStateEntries(), is(1));
assertThat(cleanupQueue, hasSize(1));
// Cleanup timer are rescheduled if a new timer is created during the bundle
TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
assertThat(testHarness.numEventTimeTimers(), is(1));
if (withCheckpointing) {
// Upon checkpointing, the bundle will be finished.
testHarness.snapshot(0, 0);
} else {
operator.invokeFinishBundle();
}
// Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
// for the window.
assertThat(cleanupQueue, hasSize(0));
verifyNoMoreInteractions(receiver);
assertThat(testHarness.numKeyedStateEntries(), is(2));
assertThat(testHarness.numEventTimeTimers(), is(2));
// No timer has been fired but bundle should be ended
assertThat(timerInputReceived.get(), is(false));
assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
// Allow user timer and cleanup timer to fire by triggering watermark advancement
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(timerInputReceived.getAndSet(false), is(true));
assertThat(cleanupQueue, hasSize(1));
// Cleanup will be executed after the bundle is complete because there are no more pending
// timers for the window
operator.invokeFinishBundle();
assertThat(cleanupQueue, hasSize(0));
assertThat(testHarness.numKeyedStateEntries(), is(0));
testHarness.close();
verifyNoMoreInteractions(receiver);
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
}).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
}).build();
ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean onceEmitted;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (onceEmitted) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create(mainOutput.getId()).accept(three);
receiverFactory.create(additionalOutput1.getId()).accept(four);
receiverFactory.create(additionalOutput2.getId()).accept(five);
onceEmitted = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
long watermark = testHarness.getCurrentWatermark() + 1;
testHarness.open();
testHarness.processElement(new StreamRecord<>(zero));
testHarness.processWatermark(watermark);
watermark++;
testHarness.processWatermark(watermark);
assertEquals(watermark, testHarness.getCurrentWatermark());
// watermark hold until bundle complete
assertEquals(0, testHarness.getOutput().size());
// triggers finish bundle
testHarness.close();
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method translateExecutableStage.
private <InputT, OutputT> void translateExecutableStage(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
// TODO: Fail on splittable DoFns.
// TODO: Special-case single outputs to avoid multiplexing PCollections.
RunnerApi.Components components = pipeline.getComponents();
RunnerApi.PTransform transform = components.getTransformsOrThrow(id);
Map<String, String> outputs = transform.getOutputsMap();
final RunnerApi.ExecutableStagePayload stagePayload;
try {
stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
} catch (IOException e) {
throw new RuntimeException(e);
}
String inputPCollectionId = stagePayload.getInput();
final TransformedSideInputs transformedSideInputs;
if (stagePayload.getSideInputsCount() > 0) {
transformedSideInputs = transformSideInputs(stagePayload, components, context);
} else {
transformedSideInputs = new TransformedSideInputs(Collections.emptyMap(), null);
}
Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags = Maps.newLinkedHashMap();
Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = Maps.newLinkedHashMap();
// TODO: does it matter which output we designate as "main"
final TupleTag<OutputT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
// associate output tags with ids, output manager uses these Integer ids to serialize state
BiMap<String, Integer> outputIndexMap = createOutputMap(outputs.keySet());
Map<String, Coder<WindowedValue<?>>> outputCoders = Maps.newHashMap();
Map<TupleTag<?>, Integer> tagsToIds = Maps.newHashMap();
Map<String, TupleTag<?>> collectionIdToTupleTag = Maps.newHashMap();
// order output names for deterministic mapping
for (String localOutputName : new TreeMap<>(outputIndexMap).keySet()) {
String collectionId = outputs.get(localOutputName);
Coder<WindowedValue<?>> windowCoder = (Coder) instantiateCoder(collectionId, components);
outputCoders.put(localOutputName, windowCoder);
TupleTag<?> tupleTag = new TupleTag<>(localOutputName);
CoderTypeInformation<WindowedValue<?>> typeInformation = new CoderTypeInformation(windowCoder, context.getPipelineOptions());
tagsToOutputTags.put(tupleTag, new OutputTag<>(localOutputName, typeInformation));
tagsToCoders.put(tupleTag, windowCoder);
tagsToIds.put(tupleTag, outputIndexMap.get(localOutputName));
collectionIdToTupleTag.put(collectionId, tupleTag);
}
final SingleOutputStreamOperator<WindowedValue<OutputT>> outputStream;
DataStream<WindowedValue<InputT>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
CoderTypeInformation<WindowedValue<OutputT>> outputTypeInformation = !outputs.isEmpty() ? new CoderTypeInformation(outputCoders.get(mainOutputTag.getId()), context.getPipelineOptions()) : null;
ArrayList<TupleTag<?>> additionalOutputTags = Lists.newArrayList();
for (TupleTag<?> tupleTag : tagsToCoders.keySet()) {
if (!mainOutputTag.getId().equals(tupleTag.getId())) {
additionalOutputTags.add(tupleTag);
}
}
final Coder<WindowedValue<InputT>> windowedInputCoder = instantiateCoder(inputPCollectionId, components);
final boolean stateful = stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0;
final boolean hasSdfProcessFn = stagePayload.getComponents().getTransformsMap().values().stream().anyMatch(pTransform -> pTransform.getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN));
Coder keyCoder = null;
KeySelector<WindowedValue<InputT>, ?> keySelector = null;
if (stateful || hasSdfProcessFn) {
// Stateful/SDF stages are only allowed of KV input.
Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder();
if (!(valueCoder instanceof KvCoder)) {
throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for stateful DoFn '%s' must be KvCoder but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
}
if (stateful) {
keyCoder = ((KvCoder) valueCoder).getKeyCoder();
keySelector = new KvToByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
} else {
// as the key.
if (!(((KvCoder) valueCoder).getKeyCoder() instanceof KvCoder)) {
throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for splittable DoFn '%s' must be KVCoder(KvCoder, DoubleCoder) but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
}
keyCoder = ((KvCoder) ((KvCoder) valueCoder).getKeyCoder()).getKeyCoder();
keySelector = new SdfByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
}
inputDataStream = inputDataStream.keyBy(keySelector);
}
DoFnOperator.MultiOutputOutputManagerFactory<OutputT> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(mainOutputTag, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(context.getPipelineOptions()));
DoFnOperator<InputT, OutputT> doFnOperator = new ExecutableStageDoFnOperator<>(transform.getUniqueName(), windowedInputCoder, Collections.emptyMap(), mainOutputTag, additionalOutputTags, outputManagerFactory, transformedSideInputs.unionTagToView, new ArrayList<>(transformedSideInputs.unionTagToView.values()), getSideInputIdToPCollectionViewMap(stagePayload, components), context.getPipelineOptions(), stagePayload, context.getJobInfo(), FlinkExecutableStageContextFactory.getInstance(), collectionIdToTupleTag, getWindowingStrategy(inputPCollectionId, components), keyCoder, keySelector);
final String operatorName = generateNameFromStagePayload(stagePayload);
if (transformedSideInputs.unionTagToView.isEmpty()) {
outputStream = inputDataStream.transform(operatorName, outputTypeInformation, doFnOperator);
} else {
DataStream<RawUnionValue> sideInputStream = transformedSideInputs.unionedSideInputs.broadcast();
if (stateful || hasSdfProcessFn) {
// We have to manually construct the two-input transform because we're not
// allowed to have only one input keyed, normally. Since Flink 1.5.0 it's
// possible to use the Broadcast State Pattern which provides a more elegant
// way to process keyed main input with broadcast state, but it's not feasible
// here because it breaks the DoFnOperator abstraction.
TwoInputTransformation<WindowedValue<KV<?, InputT>>, RawUnionValue, WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(inputDataStream.getTransformation(), sideInputStream.getTransformation(), transform.getUniqueName(), doFnOperator, outputTypeInformation, inputDataStream.getParallelism());
rawFlinkTransform.setStateKeyType(((KeyedStream) inputDataStream).getKeyType());
rawFlinkTransform.setStateKeySelectors(((KeyedStream) inputDataStream).getKeySelector(), null);
outputStream = new SingleOutputStreamOperator(inputDataStream.getExecutionEnvironment(), // we have to cheat around the ctor being protected
rawFlinkTransform) {
};
} else {
outputStream = inputDataStream.connect(sideInputStream).transform(operatorName, outputTypeInformation, doFnOperator);
}
}
// Assign a unique but consistent id to re-map operator state
outputStream.uid(transform.getUniqueName());
if (mainOutputTag != null) {
context.addDataStream(outputs.get(mainOutputTag.getId()), outputStream);
}
for (TupleTag<?> tupleTag : additionalOutputTags) {
context.addDataStream(outputs.get(tupleTag.getId()), outputStream.getSideOutput(tagsToOutputTags.get(tupleTag)));
}
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkStatefulDoFnFunction method open.
@Override
public void open(Configuration parameters) {
// Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
// deserialization method. However, this is a hack, and we want to properly initialize the
// options where they are needed.
PipelineOptions options = serializedOptions.get();
FileSystems.setDefaultPipelineOptions(options);
metricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(dofn, options);
}
Aggregations