use of org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver in project beam by apache.
the class UserParDoFnFactoryTest method testFactoryDoesNotReuseAfterAborted.
@Test
public void testFactoryDoesNotReuseAfterAborted() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
TestDoFn initialFn = new TestDoFn(Collections.<TupleTag<String>>emptyList());
CloudObject cloudObject = getCloudObject(initialFn);
ParDoFn parDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
TestDoFn fn = (TestDoFn) ((SimpleParDoFn) parDoFn).getDoFnInfo().getDoFn();
parDoFn.abort();
assertThat(fn.state, equalTo(TestDoFn.State.TORN_DOWN));
// The fn should not be torn down here
ParDoFn secondParDoFn = factory.create(options, cloudObject.clone(), null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
secondParDoFn.startBundle(rcvr);
secondParDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
TestDoFn secondFn = (TestDoFn) ((SimpleParDoFn) secondParDoFn).getDoFnInfo().getDoFn();
assertThat(secondFn, not(theInstance(fn)));
assertThat(fn.state, equalTo(TestDoFn.State.TORN_DOWN));
assertThat(secondFn.state, equalTo(TestDoFn.State.PROCESSING));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver in project beam by apache.
the class UserParDoFnFactoryTest method testCleanupRegistered.
@Test
public void testCleanupRegistered() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
DoFn<?, ?> initialFn = new TestStatefulDoFn();
CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.globalDefault().withWindowFn(FixedWindows.of(Duration.millis(10))));
TimerInternals timerInternals = mock(TimerInternals.class);
DataflowStepContext stepContext = mock(DataflowStepContext.class);
when(stepContext.timerInternals()).thenReturn(timerInternals);
DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
TestOperationContext operationContext = TestOperationContext.create(counters);
when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
parDoFn.processElement(WindowedValue.of("foo", new Instant(1), firstWindow, PaneInfo.NO_FIRING));
verify(stepContext).setStateCleanupTimer(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindow, IntervalWindow.getCoder(), firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver in project beam by apache.
the class UserParDoFnFactoryTest method testFactoryReuseInStep.
@Test
public void testFactoryReuseInStep() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
TestDoFn initialFn = new TestDoFn(Collections.<TupleTag<String>>emptyList());
CloudObject cloudObject = getCloudObject(initialFn);
TestOperationContext operationContext = TestOperationContext.create(counters);
ParDoFn parDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
TestDoFn fn = (TestDoFn) ((SimpleParDoFn) parDoFn).getDoFnInfo().getDoFn();
assertThat(fn, not(theInstance(initialFn)));
parDoFn.finishBundle();
assertThat(fn.state, equalTo(TestDoFn.State.FINISHED));
// The fn should be reused for the second call to create
ParDoFn secondParDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), operationContext);
// The fn should still be finished from the last call; it should not be set up again
assertThat(fn.state, equalTo(TestDoFn.State.FINISHED));
secondParDoFn.startBundle(rcvr);
secondParDoFn.processElement(WindowedValue.valueInGlobalWindow("spam"));
TestDoFn reobtainedFn = (TestDoFn) ((SimpleParDoFn) secondParDoFn).getDoFnInfo().getDoFn();
secondParDoFn.finishBundle();
assertThat(reobtainedFn.state, equalTo(TestDoFn.State.FINISHED));
assertThat(fn, theInstance(reobtainedFn));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver in project beam by apache.
the class SimpleParDoFn method reallyStartBundle.
private void reallyStartBundle() throws Exception {
checkState(fnRunner == null, "bundle already started (or not properly finished)");
OutputManager outputManager = new OutputManager() {
final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs = new HashMap<>();
@Nullable
private Receiver getReceiverOrNull(TupleTag<?> tag) {
Integer receiverIndex = outputTupleTagsToReceiverIndices.get(tag);
if (receiverIndex != null) {
return receivers[receiverIndex];
} else {
return undeclaredOutputs.get(tag);
}
}
@Override
public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
outputsPerElementTracker.onOutput();
Receiver receiver = getReceiverOrNull(tag);
if (receiver == null) {
// A new undeclared output.
// TODO: plumb through the operationName, so that we can
// name implicit outputs after it.
String outputName = "implicit-" + tag.getId();
// TODO: plumb through the counter prefix, so we can
// make it available to the OutputReceiver class in case
// it wants to use it in naming output counterFactory. (It
// doesn't today.)
OutputReceiver undeclaredReceiver = new OutputReceiver();
ElementCounter outputCounter = new DataflowOutputCounter(outputName, counterFactory, stepContext.getNameContext());
undeclaredReceiver.addOutputCounter(outputCounter);
undeclaredOutputs.put(tag, undeclaredReceiver);
receiver = undeclaredReceiver;
}
try {
receiver.process(output);
} catch (RuntimeException | Error e) {
// via a chain of DoFn's.
throw e;
} catch (Exception e) {
// with other Receivers.
throw new RuntimeException(e);
}
}
};
fnInfo = (DoFnInfo) doFnInstanceManager.get();
fnSignature = DoFnSignatures.getSignature(fnInfo.getDoFn().getClass());
fnRunner = runnerFactory.createRunner(fnInfo.getDoFn(), options, mainOutputTag, sideOutputTags, fnInfo.getSideInputViews(), sideInputReader, fnInfo.getInputCoder(), fnInfo.getOutputCoders(), fnInfo.getWindowingStrategy(), stepContext, userStepContext, outputManager, doFnSchemaInformation, sideInputMapping);
fnRunner.startBundle();
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver in project beam by apache.
the class UserParDoFnFactoryTest method testCleanupWorks.
@Test
public void testCleanupWorks() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
DoFn<?, ?> initialFn = new TestStatefulDoFn();
CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
// The overarching step context that only ParDoFn gets
DataflowStepContext stepContext = mock(DataflowStepContext.class);
// The user step context that the DoFnRunner gets a handle on
DataflowStepContext userStepContext = mock(DataflowStepContext.class);
when(stepContext.namespacedToUser()).thenReturn(userStepContext);
when(stepContext.stateInternals()).thenReturn(stateInternals);
when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
TestOperationContext operationContext = TestOperationContext.create(counters);
when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// This should fire the timer to clean up the first window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// And this should clean up the second window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Aggregations