use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupOnFinalWatermark.
@Test
public void testEnsureStateCleanupOnFinalWatermark() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowCoder));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey("key1", keyCoder);
keyedStateBackend.setCurrentKey(key);
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, GlobalWindow.INSTANCE);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
// No timers have been set for cleanup
assertThat(testHarness.numEventTimeTimers(), is(0));
// State has been created
assertThat(testHarness.numKeyedStateEntries(), is(1));
// Generate final watermark to trigger state cleanup
testHarness.processWatermark(new Watermark(BoundedWindow.TIMESTAMP_MAX_VALUE.plus(Duration.millis(1)).getMillis()));
assertThat(testHarness.numKeyedStateEntries(), is(0));
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class UserParDoFnFactoryTest method testCleanupWorks.
@Test
public void testCleanupWorks() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
DoFn<?, ?> initialFn = new TestStatefulDoFn();
CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
// The overarching step context that only ParDoFn gets
DataflowStepContext stepContext = mock(DataflowStepContext.class);
// The user step context that the DoFnRunner gets a handle on
DataflowStepContext userStepContext = mock(DataflowStepContext.class);
when(stepContext.namespacedToUser()).thenReturn(userStepContext);
when(stepContext.stateInternals()).thenReturn(stateInternals);
when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
TestOperationContext operationContext = TestOperationContext.create(counters);
when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// This should fire the timer to clean up the first window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// And this should clean up the second window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class WindmillTimerInternalsTest method testTimerDataToFromTimer.
@Test
public void testTimerDataToFromTimer() {
for (String stateFamily : TEST_STATE_FAMILIES) {
for (KV<Coder<? extends BoundedWindow>, StateNamespace> coderAndNamespace : TEST_NAMESPACES_WITH_CODERS) {
@Nullable Coder<? extends BoundedWindow> coder = coderAndNamespace.getKey();
StateNamespace namespace = coderAndNamespace.getValue();
for (TimeDomain timeDomain : TimeDomain.values()) {
for (WindmillNamespacePrefix prefix : WindmillNamespacePrefix.values()) {
for (Instant timestamp : TEST_TIMESTAMPS) {
List<TimerData> anonymousTimers = ImmutableList.of(TimerData.of(namespace, timestamp, timestamp, timeDomain), TimerData.of(namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
for (TimerData timer : anonymousTimers) {
assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
}
for (String timerId : TEST_TIMER_IDS) {
List<TimerData> timers = ImmutableList.of(TimerData.of(timerId, namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
for (TimerData timer : timers) {
assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
}
}
}
}
}
}
}
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class PipelineTranslatorUtils method fireTimer.
private static void fireTimer(TimerInternals.TimerData timer, Map<KV<String, String>, FnDataReceiver<Timer>> timerReceivers, Object currentTimerKey) {
StateNamespace namespace = timer.getNamespace();
Preconditions.checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
Instant timestamp = timer.getTimestamp();
Instant outputTimestamp = timer.getOutputTimestamp();
Timer<?> timerValue = Timer.of(currentTimerKey, timer.getTimerId(), Collections.singletonList(window), timestamp, outputTimestamp, PaneInfo.NO_FIRING);
KV<String, String> transformAndTimerId = TimerReceiverFactory.decodeTimerDataTimerId(timer.getTimerFamilyId());
FnDataReceiver<Timer> fnTimerReceiver = timerReceivers.get(transformAndTimerId);
Preconditions.checkNotNull(fnTimerReceiver, "No FnDataReceiver found for %s", transformAndTimerId);
try {
fnTimerReceiver.accept(timerValue);
} catch (Exception e) {
throw new RuntimeException(String.format(Locale.ENGLISH, "Failed to process timer: %s", timerValue));
}
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class SamzaTimerInternalsFactoryTest method testNewTimersAreInsertedInOrder.
@Test
public void testNewTimersAreInsertedInOrder() {
final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
pipelineOptions.setEventTimerBufferSize(5);
final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
final StateNamespace nameSpace = StateNamespaces.global();
final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
// timers in store now are timestamped from 0 - 9.
for (int i = 0; i < 10; i++) {
timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
}
// fire the first 2 timers.
// timers in memory now are timestamped from 2 - 4;
// timers in store now are timestamped from 2 - 9.
Collection<KeyedTimerData<String>> readyTimers;
timerInternalsFactory.setInputWatermark(new Instant(1));
long lastTimestamp = 0;
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(2, readyTimers.size());
// prefixed with timer, timestamp is in order;
for (int i = 0; i < 3; i++) {
timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
}
// there are 11 timers in state now.
// watermark 5 comes, so 6 timers will be evicted because their timestamp is less than 5.
// memory will be reloaded once to have 5 to 8 left (reload to have 4 to 8, but 4 is evicted), 5
// to 9 left in store.
// all of them are in order for firing.
timerInternalsFactory.setInputWatermark(new Instant(5));
lastTimestamp = 0;
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(6, readyTimers.size());
assertEquals(4, timerInternalsFactory.getEventTimeBuffer().size());
// watermark 10 comes, so all timers will be evicted in order.
timerInternalsFactory.setInputWatermark(new Instant(10));
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(4, readyTimers.size());
assertEquals(0, timerInternalsFactory.getEventTimeBuffer().size());
store.close();
}
Aggregations