use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testStageBundleClosed.
@Test
public void testStageBundleClosed() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
testHarness.close();
verify(stageBundleFactory).getInstructionRequestHandler();
verify(stageBundleFactory).close();
verify(stageContext).close();
verifyNoMoreInteractions(stageBundleFactory);
// close() will also call dispose(), but call again to verify no new bundle
// is created afterwards
operator.cleanUp();
verifyNoMoreInteractions(bundle);
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupWithKeyedInput.
@Test
@SuppressWarnings("unchecked")
public void testEnsureStateCleanupWithKeyedInput() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VarIntCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
VarIntCoder keyCoder = VarIntCoder.of();
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), keyCoder, WindowedValue.getFullCoder(keyCoder, GlobalWindow.Coder.INSTANCE));
KeyedOneInputStreamOperatorTestHarness<Integer, WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, val -> val, new CoderTypeInformation<>(keyCoder, FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
assertThat(doFnRunner, instanceOf(DoFnRunnerWithMetricsUpdate.class));
// There should be a StatefulDoFnRunner installed which takes care of clearing state
Object statefulDoFnRunner = Whitebox.getInternalState(doFnRunner, "delegate");
assertThat(statefulDoFnRunner, instanceOf(StatefulDoFnRunner.class));
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupOnFinalWatermark.
@Test
public void testEnsureStateCleanupOnFinalWatermark() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowCoder));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey("key1", keyCoder);
keyedStateBackend.setCurrentKey(key);
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, GlobalWindow.INSTANCE);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
// No timers have been set for cleanup
assertThat(testHarness.numEventTimeTimers(), is(0));
// State has been created
assertThat(testHarness.numKeyedStateEntries(), is(1));
// Generate final watermark to trigger state cleanup
testHarness.processWatermark(new Watermark(BoundedWindow.TIMESTAMP_MAX_VALUE.plus(Duration.millis(1)).getMillis()));
assertThat(testHarness.numKeyedStateEntries(), is(0));
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method expectedInputsAreSent.
@Test
public void expectedInputsAreSent() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
@SuppressWarnings("unchecked") FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
WindowedValue<Integer> one = WindowedValue.valueInGlobalWindow(1);
WindowedValue<Integer> two = WindowedValue.valueInGlobalWindow(2);
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
testHarness.open();
testHarness.processElement(new StreamRecord<>(one));
testHarness.processElement(new StreamRecord<>(two));
testHarness.processElement(new StreamRecord<>(three));
verify(receiver).accept(one);
verify(receiver).accept(two);
verify(receiver).accept(three);
verifyNoMoreInteractions(receiver);
testHarness.close();
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class SparkExecutableStageFunction method call.
@Override
public Iterator<RawUnionValue> call(Iterator<WindowedValue<InputT>> inputs) throws Exception {
SparkPipelineOptions options = pipelineOptions.get().as(SparkPipelineOptions.class);
// Register standard file systems.
FileSystems.setDefaultPipelineOptions(options);
// Otherwise, this may cause validation errors (e.g. ParDoTest)
if (!inputs.hasNext()) {
return Collections.emptyIterator();
}
try (ExecutableStageContext stageContext = contextFactory.get(jobInfo)) {
ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
try (StageBundleFactory stageBundleFactory = stageContext.getStageBundleFactory(executableStage)) {
ConcurrentLinkedQueue<RawUnionValue> collector = new ConcurrentLinkedQueue<>();
StateRequestHandler stateRequestHandler = getStateRequestHandler(executableStage, stageBundleFactory.getProcessBundleDescriptor());
if (executableStage.getTimers().size() == 0) {
ReceiverFactory receiverFactory = new ReceiverFactory(collector, outputMap);
processElements(stateRequestHandler, receiverFactory, null, stageBundleFactory, inputs);
return collector.iterator();
}
// Used with Batch, we know that all the data is available for this key. We can't use the
// timer manager from the context because it doesn't exist. So we create one and advance
// time to the end after processing all elements.
final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
timerInternals.advanceProcessingTime(Instant.now());
timerInternals.advanceSynchronizedProcessingTime(Instant.now());
ReceiverFactory receiverFactory = new ReceiverFactory(collector, outputMap);
TimerReceiverFactory timerReceiverFactory = new TimerReceiverFactory(stageBundleFactory, (Timer<?> timer, TimerInternals.TimerData timerData) -> {
currentTimerKey = timer.getUserKey();
if (timer.getClearBit()) {
timerInternals.deleteTimer(timerData);
} else {
timerInternals.setTimer(timerData);
}
}, windowCoder);
// Process inputs.
processElements(stateRequestHandler, receiverFactory, timerReceiverFactory, stageBundleFactory, inputs);
// Finish any pending windows by advancing the input watermark to infinity.
timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Finally, advance the processing time to infinity to fire any timers.
timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
// itself)
while (timerInternals.hasPendingTimers()) {
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, timerReceiverFactory, stateRequestHandler, getBundleProgressHandler())) {
PipelineTranslatorUtils.fireEligibleTimers(timerInternals, bundle.getTimerReceivers(), currentTimerKey);
}
}
return collector.iterator();
}
}
}
Aggregations