use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class StateBackendTestBase method testReducingState.
@Test
@SuppressWarnings("unchecked")
public void testReducingState() throws Exception {
CheckpointStreamFactory streamFactory = createStreamFactory();
SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
ReducingStateDescriptor<String> kvId = new ReducingStateDescriptor<>("id", new AppendingReduce(), String.class);
TypeSerializer<Integer> keySerializer = IntSerializer.INSTANCE;
TypeSerializer<VoidNamespace> namespaceSerializer = VoidNamespaceSerializer.INSTANCE;
CheckpointableKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE);
try {
ReducingState<String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<Integer, VoidNamespace, String> kvState = (InternalKvState<Integer, VoidNamespace, String>) state;
// this is only available after the backend initialized the serializer
TypeSerializer<String> valueSerializer = kvId.getSerializer();
// some modifications to the state
backend.setCurrentKey(1);
assertNull(state.get());
assertNull(getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
state.add("1");
backend.setCurrentKey(2);
assertNull(state.get());
assertNull(getSerializedValue(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
state.add("2");
backend.setCurrentKey(1);
assertEquals("1", state.get());
assertEquals("1", getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
// draw a snapshot
KeyedStateHandle snapshot1 = runSnapshot(backend.snapshot(682375462378L, 2, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation()), sharedStateRegistry);
// make some more modifications
backend.setCurrentKey(1);
state.add("u1");
backend.setCurrentKey(2);
state.add("u2");
backend.setCurrentKey(3);
state.add("u3");
// draw another snapshot
KeyedStateHandle snapshot2 = runSnapshot(backend.snapshot(682375462379L, 4, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation()), sharedStateRegistry);
// validate the original state
backend.setCurrentKey(1);
assertEquals("1,u1", state.get());
assertEquals("1,u1", getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("2,u2", state.get());
assertEquals("2,u2", getSerializedValue(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(3);
assertEquals("u3", state.get());
assertEquals("u3", getSerializedValue(kvState, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.dispose();
// restore the first snapshot and validate it
backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot1);
snapshot1.discardState();
ReducingState<String> restored1 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<Integer, VoidNamespace, String> restoredKvState1 = (InternalKvState<Integer, VoidNamespace, String>) restored1;
backend.setCurrentKey(1);
assertEquals("1", restored1.get());
assertEquals("1", getSerializedValue(restoredKvState1, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("2", restored1.get());
assertEquals("2", getSerializedValue(restoredKvState1, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.dispose();
// restore the second snapshot and validate it
backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot2);
snapshot2.discardState();
ReducingState<String> restored2 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<Integer, VoidNamespace, String> restoredKvState2 = (InternalKvState<Integer, VoidNamespace, String>) restored2;
backend.setCurrentKey(1);
assertEquals("1,u1", restored2.get());
assertEquals("1,u1", getSerializedValue(restoredKvState2, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("2,u2", restored2.get());
assertEquals("2,u2", getSerializedValue(restoredKvState2, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(3);
assertEquals("u3", restored2.get());
assertEquals("u3", getSerializedValue(restoredKvState2, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
} finally {
IOUtils.closeQuietly(backend);
backend.dispose();
}
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class StateBackendTestBase method testConcurrentMapIfQueryable.
/**
* Tests that {@link AbstractHeapState} instances respect the queryable flag and create
* concurrent variants for internal state structures.
*/
@SuppressWarnings("unchecked")
protected void testConcurrentMapIfQueryable() throws Exception {
final int numberOfKeyGroups = 1;
final CheckpointableKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE, numberOfKeyGroups, new KeyGroupRange(0, 0), new DummyEnvironment());
try {
{
// ValueState
ValueStateDescriptor<Integer> desc = new ValueStateDescriptor<>("value-state", Integer.class, -1);
desc.setQueryable("my-query");
ValueState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.update(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// ListState
ListStateDescriptor<Integer> desc = new ListStateDescriptor<>("list-state", Integer.class);
desc.setQueryable("my-query");
ListState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.add(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// ReducingState
ReducingStateDescriptor<Integer> desc = new ReducingStateDescriptor<>("reducing-state", new ReduceFunction<Integer>() {
@Override
public Integer reduce(Integer value1, Integer value2) throws Exception {
return value1 + value2;
}
}, Integer.class);
desc.setQueryable("my-query");
ReducingState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.add(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// MapState
MapStateDescriptor<Integer, String> desc = new MapStateDescriptor<>("map-state", Integer.class, String.class);
desc.setQueryable("my-query");
MapState<Integer, String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Map<Integer, String>> kvState = (InternalKvState<Integer, VoidNamespace, Map<Integer, String>>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.put(121818273, "121818273");
int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(1, numberOfKeyGroups);
StateTable stateTable = ((AbstractHeapState) kvState).getStateTable();
assertNotNull("State not set", stateTable.get(keyGroupIndex));
}
} finally {
IOUtils.closeQuietly(backend);
backend.dispose();
}
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class AllWindowedStream method reduce.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given reducer.
*
* @param reduceFunction The reduce function that is used for incremental aggregation.
* @param function The window function.
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@PublicEvolving
public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, AllWindowFunction<T, R, W> function, TypeInformation<R> resultType) {
if (reduceFunction instanceof RichFunction) {
throw new UnsupportedOperationException("ReduceFunction of reduce can not be a RichFunction.");
}
// clean the closures
function = input.getExecutionEnvironment().clean(function);
reduceFunction = input.getExecutionEnvironment().clean(reduceFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "AllWindowedStream." + callLocation;
String opName = windowAssigner.getClass().getSimpleName();
String opDescription;
KeySelector<T, Byte> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opDescription = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction<>(new ReduceApplyAllWindowFunction<>(reduceFunction, function)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
ReducingStateDescriptor<T> stateDesc = new ReducingStateDescriptor<>("window-contents", reduceFunction, input.getType().createSerializer(getExecutionEnvironment().getConfig()));
opDescription = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator).setDescription(opDescription).forceNonParallel();
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class WindowOperatorTest method testSideOutputDueToLatenessSliding.
@Test
public void testSideOutputDueToLatenessSliding() throws Exception {
final int windowSize = 3;
final int windowSlide = 1;
final long lateness = 0;
ReducingStateDescriptor<Tuple2<String, Integer>> stateDesc = new ReducingStateDescriptor<>("window-contents", new SumReducer(), STRING_INT_TUPLE.createSerializer(new ExecutionConfig()));
WindowOperator<String, Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>, TimeWindow> operator = new WindowOperator<>(SlidingEventTimeWindows.of(Time.of(windowSize, TimeUnit.SECONDS), Time.of(windowSlide, TimeUnit.SECONDS)), new TimeWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalSingleValueWindowFunction<>(new PassThroughWindowFunction<String, TimeWindow, Tuple2<String, Integer>>()), EventTimeTrigger.create(), lateness, lateOutputTag);
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness = createTestHarness(operator);
testHarness.open();
ConcurrentLinkedQueue<Object> expected = new ConcurrentLinkedQueue<>();
ConcurrentLinkedQueue<Object> sideExpected = new ConcurrentLinkedQueue<>();
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
testHarness.processWatermark(new Watermark(1999));
expected.add(new StreamRecord<>(new Tuple2<>("key2", 1), 1999));
expected.add(new Watermark(1999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 2000));
testHarness.processWatermark(new Watermark(3000));
expected.add(new StreamRecord<>(new Tuple2<>("key2", 2), 2999));
expected.add(new Watermark(3000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 3001));
// lateness is set to 0 and window size = 3 sec and slide 1, the following 2 elements (2400)
// are assigned to windows ending at 2999, 3999, 4999.
// The 2999 is dropped because it is already late (WM = 2999) but the rest are kept.
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 2400));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 2400));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 3001));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3900));
testHarness.processWatermark(new Watermark(6000));
expected.add(new StreamRecord<>(new Tuple2<>("key2", 5), 3999));
expected.add(new StreamRecord<>(new Tuple2<>("key1", 2), 3999));
expected.add(new StreamRecord<>(new Tuple2<>("key2", 4), 4999));
expected.add(new StreamRecord<>(new Tuple2<>("key1", 2), 4999));
expected.add(new StreamRecord<>(new Tuple2<>("key2", 1), 5999));
expected.add(new StreamRecord<>(new Tuple2<>("key1", 2), 5999));
expected.add(new Watermark(6000));
// sideoutput element due to lateness
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 3001));
sideExpected.add(new StreamRecord<>(new Tuple2<>("key1", 1), 3001));
testHarness.processWatermark(new Watermark(25000));
expected.add(new Watermark(25000));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expected, testHarness.getOutput(), new Tuple2ResultSortComparator());
TestHarnessUtil.assertOutputEqualsSorted("SideOutput was not correct.", sideExpected, (Iterable) testHarness.getSideOutput(lateOutputTag), new Tuple2ResultSortComparator());
testHarness.close();
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class WindowOperatorTest method testCountTrigger.
@Test
@SuppressWarnings("unchecked")
public void testCountTrigger() throws Exception {
closeCalled.set(0);
final int windowSize = 4;
ReducingStateDescriptor<Tuple2<String, Integer>> stateDesc = new ReducingStateDescriptor<>("window-contents", new SumReducer(), STRING_INT_TUPLE.createSerializer(new ExecutionConfig()));
WindowOperator<String, Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>, GlobalWindow> operator = new WindowOperator<>(GlobalWindows.create(), new GlobalWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalSingleValueWindowFunction<>(new PassThroughWindowFunction<String, GlobalWindow, Tuple2<String, Integer>>()), PurgingTrigger.of(CountTrigger.of(windowSize)), 0, null);
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness = createTestHarness(operator);
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
// The global window actually ignores these timestamps...
// add elements out-of-order
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 20));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 0));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1998));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1999));
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0L);
testHarness.close();
ConcurrentLinkedQueue<Object> outputBeforeClose = testHarness.getOutput();
stateDesc = new ReducingStateDescriptor<>("window-contents", new SumReducer(), STRING_INT_TUPLE.createSerializer(new ExecutionConfig()));
operator = new WindowOperator<>(GlobalWindows.create(), new GlobalWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalSingleValueWindowFunction<>(new PassThroughWindowFunction<String, GlobalWindow, Tuple2<String, Integer>>()), PurgingTrigger.of(CountTrigger.of(windowSize)), 0, null);
testHarness = createTestHarness(operator);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 4), Long.MAX_VALUE));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, Iterables.concat(outputBeforeClose, testHarness.getOutput()), new Tuple2ResultSortComparator());
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 10999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key1", 4), Long.MAX_VALUE));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 4), Long.MAX_VALUE));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, Iterables.concat(outputBeforeClose, testHarness.getOutput()), new Tuple2ResultSortComparator());
testHarness.close();
}
Aggregations