use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class AbstractStreamOperatorTest method testStateAndTimerStateShufflingScalingUp.
/**
* Verify that state and timers are checkpointed per key group and that they are correctly
* assigned to operator subtasks when restoring.
*/
@Test
public void testStateAndTimerStateShufflingScalingUp() throws Exception {
final int MAX_PARALLELISM = 10;
// first get two keys that will fall into different key-group ranges that go
// to different operator subtasks when we restore
// get two sub key-ranges so that we can restore two ranges separately
KeyGroupRange subKeyGroupRange1 = new KeyGroupRange(0, (MAX_PARALLELISM / 2) - 1);
KeyGroupRange subKeyGroupRange2 = new KeyGroupRange(subKeyGroupRange1.getEndKeyGroup() + 1, MAX_PARALLELISM - 1);
// get two different keys, one per sub range
int key1 = getKeyInKeyGroupRange(subKeyGroupRange1, MAX_PARALLELISM);
int key2 = getKeyInKeyGroupRange(subKeyGroupRange2, MAX_PARALLELISM);
TestOperator testOperator = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(testOperator, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 1, /* num subtasks */
0);
testHarness.open();
testHarness.processWatermark(0L);
testHarness.setProcessingTime(0L);
testHarness.processElement(new Tuple2<>(key1, "SET_EVENT_TIME_TIMER:10"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_EVENT_TIME_TIMER:20"), 0);
testHarness.processElement(new Tuple2<>(key1, "SET_PROC_TIME_TIMER:10"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_PROC_TIME_TIMER:20"), 0);
testHarness.processElement(new Tuple2<>(key1, "SET_STATE:HELLO"), 0);
testHarness.processElement(new Tuple2<>(key2, "SET_STATE:CIAO"), 0);
assertTrue(extractResult(testHarness).isEmpty());
OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
// now, restore in two operators, first operator 1
TestOperator testOperator1 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness1 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator1, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
0);
testHarness1.setup();
testHarness1.initializeState(snapshot);
testHarness1.open();
testHarness1.processWatermark(10L);
assertThat(extractResult(testHarness1), contains("ON_EVENT_TIME:HELLO"));
assertTrue(extractResult(testHarness1).isEmpty());
// this should not trigger anything, the trigger for WM=20 should sit in the
// other operator subtask
testHarness1.processWatermark(20L);
assertTrue(extractResult(testHarness1).isEmpty());
testHarness1.setProcessingTime(10L);
assertThat(extractResult(testHarness1), contains("ON_PROC_TIME:HELLO"));
assertTrue(extractResult(testHarness1).isEmpty());
// this should not trigger anything, the trigger for TIME=20 should sit in the
// other operator subtask
testHarness1.setProcessingTime(20L);
assertTrue(extractResult(testHarness1).isEmpty());
// now, for the second operator
TestOperator testOperator2 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness2 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator2, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
1);
testHarness2.setup();
testHarness2.initializeState(snapshot);
testHarness2.open();
testHarness2.processWatermark(10L);
// nothing should happen because this timer is in the other subtask
assertTrue(extractResult(testHarness2).isEmpty());
testHarness2.processWatermark(20L);
assertThat(extractResult(testHarness2), contains("ON_EVENT_TIME:CIAO"));
testHarness2.setProcessingTime(10L);
// nothing should happen because this timer is in the other subtask
assertTrue(extractResult(testHarness2).isEmpty());
testHarness2.setProcessingTime(20L);
assertThat(extractResult(testHarness2), contains("ON_PROC_TIME:CIAO"));
assertTrue(extractResult(testHarness2).isEmpty());
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class AbstractStreamOperatorTest method testStateAndTimerStateShufflingScalingDown.
@Test
public void testStateAndTimerStateShufflingScalingDown() throws Exception {
final int MAX_PARALLELISM = 10;
// first get two keys that will fall into different key-group ranges that go
// to different operator subtasks when we restore
// get two sub key-ranges so that we can restore two ranges separately
KeyGroupRange subKeyGroupRange1 = new KeyGroupRange(0, (MAX_PARALLELISM / 2) - 1);
KeyGroupRange subKeyGroupRange2 = new KeyGroupRange(subKeyGroupRange1.getEndKeyGroup() + 1, MAX_PARALLELISM - 1);
// get two different keys, one per sub range
int key1 = getKeyInKeyGroupRange(subKeyGroupRange1, MAX_PARALLELISM);
int key2 = getKeyInKeyGroupRange(subKeyGroupRange2, MAX_PARALLELISM);
TestOperator testOperator1 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness1 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator1, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
0);
testHarness1.setup();
testHarness1.open();
testHarness1.processWatermark(0L);
testHarness1.setProcessingTime(0L);
TestOperator testOperator2 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness2 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator2, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 2, /* num subtasks */
1);
testHarness2.setup();
testHarness2.open();
testHarness2.processWatermark(0L);
testHarness2.setProcessingTime(0L);
// register some state with both instances and scale down to parallelism 1
testHarness1.processElement(new Tuple2<>(key1, "SET_EVENT_TIME_TIMER:30"), 0);
testHarness1.processElement(new Tuple2<>(key1, "SET_PROC_TIME_TIMER:30"), 0);
testHarness1.processElement(new Tuple2<>(key1, "SET_STATE:HELLO"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_EVENT_TIME_TIMER:40"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_PROC_TIME_TIMER:40"), 0);
testHarness2.processElement(new Tuple2<>(key2, "SET_STATE:CIAO"), 0);
// take a snapshot from each one of the "parallel" instances of the operator
// and combine them into one so that we can scale down
OperatorStateHandles repackagedState = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0, 0), testHarness2.snapshot(0, 0));
// now, for the third operator that scales down from parallelism of 2 to 1
TestOperator testOperator3 = new TestOperator();
KeyedOneInputStreamOperatorTestHarness<Integer, Tuple2<Integer, String>, String> testHarness3 = new KeyedOneInputStreamOperatorTestHarness<>(testOperator3, new TestKeySelector(), BasicTypeInfo.INT_TYPE_INFO, MAX_PARALLELISM, 1, /* num subtasks */
0);
testHarness3.setup();
testHarness3.initializeState(repackagedState);
testHarness3.open();
testHarness3.processWatermark(30L);
assertThat(extractResult(testHarness3), contains("ON_EVENT_TIME:HELLO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.processWatermark(40L);
assertThat(extractResult(testHarness3), contains("ON_EVENT_TIME:CIAO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.setProcessingTime(30L);
assertThat(extractResult(testHarness3), contains("ON_PROC_TIME:HELLO"));
assertTrue(extractResult(testHarness3).isEmpty());
testHarness3.setProcessingTime(40L);
assertThat(extractResult(testHarness3), contains("ON_PROC_TIME:CIAO"));
assertTrue(extractResult(testHarness3).isEmpty());
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class KeyedCoProcessOperatorTest method testSnapshotAndRestore.
@Test
public void testSnapshotAndRestore() throws Exception {
KeyedCoProcessOperator<String, Integer, String, String> operator = new KeyedCoProcessOperator<>(new BothTriggeringProcessFunction());
TwoInputStreamOperatorTestHarness<Integer, String, String> testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(operator, new IntToStringKeySelector<>(), new IdentityKeySelector<String>(), BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setup();
testHarness.open();
testHarness.processElement1(new StreamRecord<>(5, 12L));
testHarness.processElement2(new StreamRecord<>("5", 12L));
// snapshot and restore from scratch
OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
testHarness.close();
operator = new KeyedCoProcessOperator<>(new BothTriggeringProcessFunction());
testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(operator, new IntToStringKeySelector<>(), new IdentityKeySelector<String>(), BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.setProcessingTime(5);
testHarness.processWatermark1(new Watermark(6));
testHarness.processWatermark2(new Watermark(6));
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
expectedOutput.add(new StreamRecord<>("PROC:1777", 5L));
expectedOutput.add(new StreamRecord<>("EVENT:1777", 6L));
expectedOutput.add(new Watermark(6));
TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.close();
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class AbstractStreamOperatorTestHarness method repackageState.
/**
* Takes the different {@link OperatorStateHandles} created by calling {@link #snapshot(long, long)}
* on different instances of {@link AbstractStreamOperatorTestHarness} (each one representing one subtask)
* and repacks them into a single {@link OperatorStateHandles} so that the parallelism of the test
* can change arbitrarily (i.e. be able to scale both up and down).
*
* <p>
* After repacking the partial states, use {@link #initializeState(OperatorStateHandles)} to initialize
* a new instance with the resulting state. Bear in mind that for parallelism greater than one, you
* have to use the constructor {@link #AbstractStreamOperatorTestHarness(StreamOperator, int, int, int)}.
*
* <p>
* <b>NOTE: </b> each of the {@code handles} in the argument list is assumed to be from a single task of a single
* operator (i.e. chain length of one).
*
* <p>
* For an example of how to use it, have a look at
* {@link AbstractStreamOperatorTest#testStateAndTimerStateShufflingScalingDown()}.
*
* @param handles the different states to be merged.
* @return the resulting state, or {@code null} if no partial states are specified.
*/
public static OperatorStateHandles repackageState(OperatorStateHandles... handles) throws Exception {
if (handles.length < 1) {
return null;
} else if (handles.length == 1) {
return handles[0];
}
List<OperatorStateHandle> mergedManagedOperatorState = new ArrayList<>(handles.length);
List<OperatorStateHandle> mergedRawOperatorState = new ArrayList<>(handles.length);
List<KeyGroupsStateHandle> mergedManagedKeyedState = new ArrayList<>(handles.length);
List<KeyGroupsStateHandle> mergedRawKeyedState = new ArrayList<>(handles.length);
for (OperatorStateHandles handle : handles) {
Collection<OperatorStateHandle> managedOperatorState = handle.getManagedOperatorState();
Collection<OperatorStateHandle> rawOperatorState = handle.getRawOperatorState();
Collection<KeyGroupsStateHandle> managedKeyedState = handle.getManagedKeyedState();
Collection<KeyGroupsStateHandle> rawKeyedState = handle.getRawKeyedState();
if (managedOperatorState != null) {
mergedManagedOperatorState.addAll(managedOperatorState);
}
if (rawOperatorState != null) {
mergedRawOperatorState.addAll(rawOperatorState);
}
if (managedKeyedState != null) {
mergedManagedKeyedState.addAll(managedKeyedState);
}
if (rawKeyedState != null) {
mergedRawKeyedState.addAll(rawKeyedState);
}
}
return new OperatorStateHandles(0, null, mergedManagedKeyedState, mergedRawKeyedState, mergedManagedOperatorState, mergedRawOperatorState);
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class WindowOperatorTest method testSessionWindowsWithProcessFunction.
@Test
@SuppressWarnings("unchecked")
public void testSessionWindowsWithProcessFunction() throws Exception {
closeCalled.set(0);
final int SESSION_SIZE = 3;
TypeInformation<Tuple2<String, Integer>> inputType = TypeInfoParser.parse("Tuple2<String, Integer>");
ListStateDescriptor<Tuple2<String, Integer>> stateDesc = new ListStateDescriptor<>("window-contents", inputType.createSerializer(new ExecutionConfig()));
WindowOperator<String, Tuple2<String, Integer>, Iterable<Tuple2<String, Integer>>, Tuple3<String, Long, Long>, TimeWindow> operator = new WindowOperator<>(EventTimeSessionWindows.withGap(Time.seconds(SESSION_SIZE)), new TimeWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new SessionProcessWindowFunction()), EventTimeTrigger.create(), 0, null);
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple3<String, Long, Long>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
// add elements out-of-order
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 0));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 2), 1000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 3), 2500));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 10));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 2), 1000));
// do a snapshot, close and restore again
OperatorStateHandles snapshot = testHarness.snapshot(0L, 0L);
testHarness.close();
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 3), 2500));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 4), 5501));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 5), 6000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 5), 6000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 6), 6050));
testHarness.processWatermark(new Watermark(12000));
expectedOutput.add(new StreamRecord<>(new Tuple3<>("key1-6", 10L, 5500L), 5499));
expectedOutput.add(new StreamRecord<>(new Tuple3<>("key2-6", 0L, 5500L), 5499));
expectedOutput.add(new StreamRecord<>(new Tuple3<>("key2-20", 5501L, 9050L), 9049));
expectedOutput.add(new Watermark(12000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 10), 15000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 20), 15000));
testHarness.processWatermark(new Watermark(17999));
expectedOutput.add(new StreamRecord<>(new Tuple3<>("key2-30", 15000L, 18000L), 17999));
expectedOutput.add(new Watermark(17999));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple3ResultSortComparator());
testHarness.close();
}
Aggregations