use of org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness in project beam by apache.
the class DoFnOperatorTest method testStateGCForStatefulFn.
@Test
public void testStateGCForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
final String timerId = "boo";
final String stateId = "dazzle";
final int offset = 5000;
final int timerOutput = 4093;
DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
context.output(KV.of(state.read(), timerOutput));
}
};
WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder = WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), windowingStrategy.getWindowFn().windowCoder());
TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
DoFnOperator<KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
Collections.<PCollectionView<?>>emptyList(), /* side inputs */
PipelineOptionsFactory.as(FlinkPipelineOptions.class), StringUtf8Coder.of());
KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<KV<String, Integer>>, String>() {
@Override
public String getKey(WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
return kvWindowedValue.getValue().getKey();
}
}, new CoderTypeInformation<>(StringUtf8Coder.of()));
testHarness.open();
testHarness.processWatermark(0);
assertEquals(0, testHarness.numKeyedStateEntries());
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
assertEquals(2, testHarness.numKeyedStateEntries());
testHarness.getOutput().clear();
// this should trigger both the window.maxTimestamp() timer and the GC timer
// this tests that the GC timer fires after the user timer
testHarness.processWatermark(window1.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).getMillis());
assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
// ensure the state was garbage collected
assertEquals(0, testHarness.numKeyedStateEntries());
testHarness.close();
}
use of org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness in project flink by apache.
the class EvictingWindowOperatorTest method testTimeEvictorEvictAfter.
/**
* Tests TimeEvictor evictAfter behavior
* @throws Exception
*/
@Test
public void testTimeEvictorEvictAfter() throws Exception {
AtomicInteger closeCalled = new AtomicInteger(0);
final int TRIGGER_COUNT = 2;
final boolean EVICT_AFTER = true;
TypeInformation<Tuple2<String, Integer>> inputType = TypeInfoParser.parse("Tuple2<String, Integer>");
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<Tuple2<String, Integer>>> streamRecordSerializer = (TypeSerializer<StreamRecord<Tuple2<String, Integer>>>) new StreamElementSerializer(inputType.createSerializer(new ExecutionConfig()));
ListStateDescriptor<StreamRecord<Tuple2<String, Integer>>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
EvictingWindowOperator<String, Tuple2<String, Integer>, Tuple2<String, Integer>, GlobalWindow> operator = new EvictingWindowOperator<>(GlobalWindows.create(), new GlobalWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalIterableWindowFunction<>(new RichSumReducer<GlobalWindow>(closeCalled)), CountTrigger.of(TRIGGER_COUNT), TimeEvictor.of(Time.seconds(2), EVICT_AFTER), 0, null);
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
long initialTime = 0L;
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 1000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 4000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), initialTime + 20));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), initialTime));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), initialTime + 999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 3500));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 2001));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 1001));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 2), Long.MAX_VALUE));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key1", 2), Long.MAX_VALUE));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 3), Long.MAX_VALUE));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new ResultSortComparator());
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), initialTime + 10999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), initialTime + 1002));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key1", 4), Long.MAX_VALUE));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 5), Long.MAX_VALUE));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new ResultSortComparator());
testHarness.close();
Assert.assertEquals("Close was not called.", 1, closeCalled.get());
}
use of org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness in project flink by apache.
the class AbstractStreamOperatorTest method testStateAndTimerStateShufflingScalingUp.
/**
* Verify that state and timers are checkpointed per key group and that they are correctly
* assigned to operator subtasks when restoring.
*/
@Test
public void testStateAndTimerStateShufflingScalingUp() throws Exception {
final int MAX_PARALLELISM = 10;
// first get two keys that will fall into different key-group ranges that go
// to different operator subtasks when we restore
// get two sub key-ranges so that we can restore two ranges separately
KeyGroupRange subKeyGroupRange1 = new KeyGroupRange(0, (MAX_PARALLELISM / 2) - 1);
KeyGroupRange subKeyGroupRange2 = new KeyGroupRange(subKeyGroupRange1.getEndKeyGroup() + 1, MAX_PARALLELISM - 1);
// get two different keys, one per sub range
int key1 = getKeyInKeyGroupRange(subKeyGroupRange1, MAX_PARALLELISM);
int key2 = getKeyInKeyGroupRange(subKeyGroupRange2, MAX_PARALLELISM);
TestOperator testOperator = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(testOperator, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 1, /* num subtasks */
0);
testHarness.open();
testHarness.processWatermark(0L);
testHarness.setProcessingTime(0L);
testHarness.processElement(new Tuple2<>(key1, "SET_EVENT_TIME_TIMER:10"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_EVENT_TIME_TIMER:20"), 0);
testHarness.processElement(new Tuple2<>(key1, "SET_PROC_TIME_TIMER:10"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_PROC_TIME_TIMER:20"), 0);
testHarness.processElement(new Tuple2<>(key1, "SET_STATE:HELLO"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_STATE:CIAO"), 0);
assertTrue(extractResult(testHarness).isEmpty());
OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
// now, restore in two operators, first operator 1
TestOperator testOperator1 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness1 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator1, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
0);
testHarness1.setup();
testHarness1.initializeState(snapshot);
testHarness1.open();
testHarness1.processWatermark(10L);
assertThat(extractResult(testHarness1), contains("ON_EVENT_TIME:HELLO"));
assertTrue(extractResult(testHarness1).isEmpty());
// this should not trigger anything, the trigger for WM=20 should sit in the
// other operator subtask
testHarness1.processWatermark(20L);
assertTrue(extractResult(testHarness1).isEmpty());
testHarness1.setProcessingTime(10L);
assertThat(extractResult(testHarness1), contains("ON_PROC_TIME:HELLO"));
assertTrue(extractResult(testHarness1).isEmpty());
// this should not trigger anything, the trigger for TIME=20 should sit in the
// other operator subtask
testHarness1.setProcessingTime(20L);
assertTrue(extractResult(testHarness1).isEmpty());
// now, for the second operator
TestOperator testOperator2 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness2 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator2, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
1);
testHarness2.setup();
testHarness2.initializeState(snapshot);
testHarness2.open();
testHarness2.processWatermark(10L);
// nothing should happen because this timer is in the other subtask
assertTrue(extractResult(testHarness2).isEmpty());
testHarness2.processWatermark(20L);
assertThat(extractResult(testHarness2), contains("ON_EVENT_TIME:CIAO"));
testHarness2.setProcessingTime(10L);
// nothing should happen because this timer is in the other subtask
assertTrue(extractResult(testHarness2).isEmpty());
testHarness2.setProcessingTime(20L);
assertThat(extractResult(testHarness2), contains("ON_PROC_TIME:CIAO"));
assertTrue(extractResult(testHarness2).isEmpty());
}
use of org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness in project flink by apache.
the class AbstractStreamOperatorTest method testStateAndTimerStateShufflingScalingDown.
@Test
public void testStateAndTimerStateShufflingScalingDown() throws Exception {
final int MAX_PARALLELISM = 10;
// first get two keys that will fall into different key-group ranges that go
// to different operator subtasks when we restore
// get two sub key-ranges so that we can restore two ranges separately
KeyGroupRange subKeyGroupRange1 = new KeyGroupRange(0, (MAX_PARALLELISM / 2) - 1);
KeyGroupRange subKeyGroupRange2 = new KeyGroupRange(subKeyGroupRange1.getEndKeyGroup() + 1, MAX_PARALLELISM - 1);
// get two different keys, one per sub range
int key1 = getKeyInKeyGroupRange(subKeyGroupRange1, MAX_PARALLELISM);
int key2 = getKeyInKeyGroupRange(subKeyGroupRange2, MAX_PARALLELISM);
TestOperator testOperator1 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness1 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator1, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
0);
testHarness1.setup();
testHarness1.open();
testHarness1.processWatermark(0L);
testHarness1.setProcessingTime(0L);
TestOperator testOperator2 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness2 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator2, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
1);
testHarness2.setup();
testHarness2.open();
testHarness2.processWatermark(0L);
testHarness2.setProcessingTime(0L);
// register some state with both instances and scale down to parallelism 1
testHarness1.processElement(new Tuple2<>(key1, "SET_EVENT_TIME_TIMER:30"), 0);
testHarness1.processElement(new Tuple2<>(key1, "SET_PROC_TIME_TIMER:30"), 0);
testHarness1.processElement(new Tuple2<>(key1, "SET_STATE:HELLO"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_EVENT_TIME_TIMER:40"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_PROC_TIME_TIMER:40"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_STATE:CIAO"), 0);
// take a snapshot from each one of the "parallel" instances of the operator
// and combine them into one so that we can scale down
OperatorStateHandles repackagedState = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0, 0), testHarness2.snapshot(0, 0));
// now, for the third operator that scales down from parallelism of 2 to 1
TestOperator testOperator3 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness3 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator3, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 1, /* num subtasks */
0);
testHarness3.setup();
testHarness3.initializeState(repackagedState);
testHarness3.open();
testHarness3.processWatermark(30L);
assertThat(extractResult(testHarness3), contains("ON_EVENT_TIME:HELLO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.processWatermark(40L);
assertThat(extractResult(testHarness3), contains("ON_EVENT_TIME:CIAO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.setProcessingTime(30L);
assertThat(extractResult(testHarness3), contains("ON_PROC_TIME:HELLO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.setProcessingTime(40L);
assertThat(extractResult(testHarness3), contains("ON_PROC_TIME:CIAO"));
assertTrue(extractResult(testHarness3).isEmpty());
}
use of org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness in project flink by apache.
the class StreamGroupedFoldTest method testGroupedFold.
@Test
public void testGroupedFold() throws Exception {
KeySelector<Integer, String> keySelector = new KeySelector<Integer, String>() {
@Override
public String getKey(Integer value) {
return value.toString();
}
};
StreamGroupedFold<Integer, String, String> operator = new StreamGroupedFold<>(new MyFolder(), "100");
operator.setOutputType(BasicTypeInfo.STRING_TYPE_INFO, new ExecutionConfig());
OneInputStreamOperatorTestHarness<Integer, String> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, keySelector, BasicTypeInfo.STRING_TYPE_INFO);
long initialTime = 0L;
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
testHarness.processElement(new StreamRecord<>(1, initialTime + 1));
testHarness.processElement(new StreamRecord<>(1, initialTime + 2));
testHarness.processWatermark(new Watermark(initialTime + 2));
testHarness.processElement(new StreamRecord<>(2, initialTime + 3));
testHarness.processElement(new StreamRecord<>(2, initialTime + 4));
testHarness.processElement(new StreamRecord<>(3, initialTime + 5));
expectedOutput.add(new StreamRecord<>("1001", initialTime + 1));
expectedOutput.add(new StreamRecord<>("10011", initialTime + 2));
expectedOutput.add(new Watermark(initialTime + 2));
expectedOutput.add(new StreamRecord<>("1002", initialTime + 3));
expectedOutput.add(new StreamRecord<>("10022", initialTime + 4));
expectedOutput.add(new StreamRecord<>("1003", initialTime + 5));
TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
}
Aggregations