use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class SimpleParDoFnTest method executeParDoFnCounterTest.
/**
* Set up and execute a basic {@link ParDoFn} to validate reported counter values.
*
* @param inputData Input elements to process. For each element X, the DoFn will output a string
* repeated X times.
* @return Delta counter updates extracted after execution.
* @throws Exception
*/
private List<CounterUpdate> executeParDoFnCounterTest(int... inputData) throws Exception {
class RepeaterDoFn extends DoFn<Integer, String> {
/**
* Takes as input the number of times to output a message.
*/
@ProcessElement
public void processElement(ProcessContext c) {
int numTimes = c.element();
for (int i = 0; i < numTimes; i++) {
c.output(String.format("I will repeat this message %d times", numTimes));
}
}
}
DoFn<Integer, String> fn = new RepeaterDoFn();
DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
ParDoFn parDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.singleInstance(fnInfo), new EmptySideInputReader(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), stepContext, operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
parDoFn.startBundle(new TestReceiver());
for (int input : inputData) {
parDoFn.processElement(WindowedValue.valueInGlobalWindow(input));
}
return operationContext.counterSet().extractUpdates(true, DataflowCounterUpdateExtractor.INSTANCE);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class SimpleParDoFnTest method testErrorPropagation.
@Test
public void testErrorPropagation() throws Exception {
TestErrorDoFn fn = new TestErrorDoFn();
DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
TestReceiver receiver = new TestReceiver();
ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.singleInstance(fnInfo), new EmptySideInputReader(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
try {
userParDoFn.startBundle(receiver);
userParDoFn.processElement(null);
fail("should have failed");
} catch (Exception exn) {
// Because we're calling this from inside the SDK and not from a
// user's program (e.g. through Pipeline.run), the error should
// be thrown as a UserCodeException. The cause of the
// UserCodeError shouldn't contain any of the stack from within
// the SDK, since we don't want to overwhelm users with stack
// frames outside of their control.
assertThat(exn, instanceOf(UserCodeException.class));
// Stack trace of the cause should contain three frames:
// TestErrorDoFn.nestedFunctionBeta
// TestErrorDoFn.nestedFunctionAlpha
// TestErrorDoFn.startBundle
assertThat(stackTraceFrameStrings(exn.getCause()), contains(containsString("TestErrorDoFn.nestedFunctionBeta"), containsString("TestErrorDoFn.nestedFunctionAlpha"), containsString("TestErrorDoFn.startBundle")));
assertThat(exn.toString(), containsString("test error in initialize"));
}
try {
userParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
fail("should have failed");
} catch (Exception exn) {
// Exception should be a UserCodeException since we're calling
// from inside the SDK.
assertThat(exn, instanceOf(UserCodeException.class));
// Stack trace of the cause should contain two frames:
// TestErrorDoFn.nestedFunctionBeta
// TestErrorDoFn.processElement
assertThat(stackTraceFrameStrings(exn.getCause()), contains(containsString("TestErrorDoFn.nestedFunctionBeta"), containsString("TestErrorDoFn.processElement")));
assertThat(exn.toString(), containsString("test error in process"));
}
try {
userParDoFn.finishBundle();
fail("should have failed");
} catch (Exception exn) {
// Exception should be a UserCodeException since we're calling
// from inside the SDK.
assertThat(exn, instanceOf(UserCodeException.class));
// Stack trace should only contain a single frame:
// TestErrorDoFn.finishBundle
assertThat(stackTraceFrameStrings(exn.getCause()), contains(containsString("TestErrorDoFn.finishBundle")));
assertThat(exn.toString(), containsString("test error in finalize"));
}
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class UserParDoFnFactoryTest method testCleanupWorks.
@Test
public void testCleanupWorks() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
DoFn<?, ?> initialFn = new TestStatefulDoFn();
CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
// The overarching step context that only ParDoFn gets
DataflowStepContext stepContext = mock(DataflowStepContext.class);
// The user step context that the DoFnRunner gets a handle on
DataflowStepContext userStepContext = mock(DataflowStepContext.class);
when(stepContext.namespacedToUser()).thenReturn(userStepContext);
when(stepContext.stateInternals()).thenReturn(stateInternals);
when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
TestOperationContext operationContext = TestOperationContext.create(counters);
when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// This should fire the timer to clean up the first window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// And this should clean up the second window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class UserParDoFnFactoryTest method testFactorySimultaneousUse.
@Test
public void testFactorySimultaneousUse() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
TestDoFn initialFn = new TestDoFn(Collections.<TupleTag<String>>emptyList());
CloudObject cloudObject = getCloudObject(initialFn);
ParDoFn parDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
// The fn should not be reused while the first ParDoFn is not finished
ParDoFn secondParDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
// Must be after the first call to process element for reallyStartBundle to have been called
TestDoFn firstDoFn = (TestDoFn) ((SimpleParDoFn) parDoFn).getDoFnInfo().getDoFn();
secondParDoFn.startBundle(rcvr);
secondParDoFn.processElement(WindowedValue.valueInGlobalWindow("spam"));
// Must be after the first call to process element for reallyStartBundle to have been called
TestDoFn secondDoFn = (TestDoFn) ((SimpleParDoFn) secondParDoFn).getDoFnInfo().getDoFn();
parDoFn.finishBundle();
secondParDoFn.finishBundle();
assertThat(firstDoFn, not(theInstance(secondDoFn)));
assertThat(firstDoFn.state, equalTo(TestDoFn.State.FINISHED));
assertThat(secondDoFn.state, equalTo(TestDoFn.State.FINISHED));
}
Aggregations