use of org.apache.flink.api.common.state.FoldingStateDescriptor in project flink by apache.
the class StateBackendTestBase method testFoldingState.
@Test
@SuppressWarnings("unchecked,rawtypes")
public void testFoldingState() {
try {
CheckpointStreamFactory streamFactory = createStreamFactory();
AbstractKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE);
FoldingStateDescriptor<Integer, String> kvId = new FoldingStateDescriptor<>("id", "Fold-Initial:", new AppendingFold(), String.class);
kvId.initializeSerializerUnlessSet(new ExecutionConfig());
TypeSerializer<Integer> keySerializer = IntSerializer.INSTANCE;
TypeSerializer<VoidNamespace> namespaceSerializer = VoidNamespaceSerializer.INSTANCE;
TypeSerializer<String> valueSerializer = kvId.getSerializer();
FoldingState<Integer, String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<VoidNamespace> kvState = (InternalKvState<VoidNamespace>) state;
// some modifications to the state
backend.setCurrentKey(1);
assertEquals(null, state.get());
assertEquals(null, getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
state.add(1);
backend.setCurrentKey(2);
assertEquals(null, state.get());
assertEquals(null, getSerializedValue(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
state.add(2);
backend.setCurrentKey(1);
assertEquals("Fold-Initial:,1", state.get());
assertEquals("Fold-Initial:,1", getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
// draw a snapshot
KeyGroupsStateHandle snapshot1 = FutureUtil.runIfNotDoneAndGet(backend.snapshot(682375462378L, 2, streamFactory, CheckpointOptions.forFullCheckpoint()));
// make some more modifications
backend.setCurrentKey(1);
state.clear();
state.add(101);
backend.setCurrentKey(2);
state.add(102);
backend.setCurrentKey(3);
state.add(103);
// draw another snapshot
KeyGroupsStateHandle snapshot2 = FutureUtil.runIfNotDoneAndGet(backend.snapshot(682375462379L, 4, streamFactory, CheckpointOptions.forFullCheckpoint()));
// validate the original state
backend.setCurrentKey(1);
assertEquals("Fold-Initial:,101", state.get());
assertEquals("Fold-Initial:,101", getSerializedValue(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("Fold-Initial:,2,102", state.get());
assertEquals("Fold-Initial:,2,102", getSerializedValue(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(3);
assertEquals("Fold-Initial:,103", state.get());
assertEquals("Fold-Initial:,103", getSerializedValue(kvState, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.dispose();
// restore the first snapshot and validate it
backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot1);
snapshot1.discardState();
FoldingState<Integer, String> restored1 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<VoidNamespace> restoredKvState1 = (InternalKvState<VoidNamespace>) restored1;
backend.setCurrentKey(1);
assertEquals("Fold-Initial:,1", restored1.get());
assertEquals("Fold-Initial:,1", getSerializedValue(restoredKvState1, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("Fold-Initial:,2", restored1.get());
assertEquals("Fold-Initial:,2", getSerializedValue(restoredKvState1, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.dispose();
// restore the second snapshot and validate it
backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot2);
snapshot1.discardState();
@SuppressWarnings("unchecked") FoldingState<Integer, String> restored2 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
@SuppressWarnings("unchecked") InternalKvState<VoidNamespace> restoredKvState2 = (InternalKvState<VoidNamespace>) restored2;
backend.setCurrentKey(1);
assertEquals("Fold-Initial:,101", restored2.get());
assertEquals("Fold-Initial:,101", getSerializedValue(restoredKvState2, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(2);
assertEquals("Fold-Initial:,2,102", restored2.get());
assertEquals("Fold-Initial:,2,102", getSerializedValue(restoredKvState2, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.setCurrentKey(3);
assertEquals("Fold-Initial:,103", restored2.get());
assertEquals("Fold-Initial:,103", getSerializedValue(restoredKvState2, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
backend.dispose();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.state.FoldingStateDescriptor in project flink by apache.
the class WindowedStream method apply.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>
* Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue The initial value of the fold.
* @param foldFunction The fold function that is used for incremental aggregation.
* @param function The window function.
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*
* @deprecated Use {@link #fold(Object, FoldFunction, WindowFunction, TypeInformation, TypeInformation)} instead.
*/
@Deprecated
public <R> SingleOutputStreamOperator<R> apply(R initialValue, FoldFunction<T, R> foldFunction, WindowFunction<R, R, K, W> function, TypeInformation<R> resultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction of apply can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new FoldApplyWindowFunction<>(initialValue, foldFunction, function, resultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, R> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, resultType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator);
}
use of org.apache.flink.api.common.state.FoldingStateDescriptor in project flink by apache.
the class WindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue the initial value to be passed to the first invocation of the fold function
* @param foldFunction The fold function.
* @param foldResultType The result type of the fold function.
* @param windowFunction The process window function.
* @param windowResultType The process window function result type.
* @return The data stream that is the result of applying the fold function to the window.
*/
@Internal
public <R, ACC> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, ProcessWindowFunction<ACC, R, K, W> windowFunction, TypeInformation<ACC> foldResultType, TypeInformation<R> windowResultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new FoldApplyProcessWindowFunction<>(initialValue, foldFunction, windowFunction, foldResultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldResultType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, windowResultType, operator);
}
use of org.apache.flink.api.common.state.FoldingStateDescriptor in project flink by apache.
the class AllWindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>
* Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue The initial value of the fold.
* @param foldFunction The fold function that is used for incremental aggregation.
* @param function The window function.
* @param foldAccumulatorType Type information for the result type of the fold function
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@PublicEvolving
public <ACC, R> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, AllWindowFunction<ACC, R, W> function, TypeInformation<ACC> foldAccumulatorType, TypeInformation<R> resultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction of fold can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "AllWindowedStream." + callLocation;
String opName;
KeySelector<T, Byte> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction<>(new FoldApplyAllWindowFunction<>(initialValue, foldFunction, function, foldAccumulatorType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldAccumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator).forceNonParallel();
}
use of org.apache.flink.api.common.state.FoldingStateDescriptor in project flink by apache.
the class WindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>
* Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue The initial value of the fold.
* @param foldFunction The fold function that is used for incremental aggregation.
* @param function The window function.
* @param foldAccumulatorType Type information for the result type of the fold function
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@PublicEvolving
public <ACC, R> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, WindowFunction<ACC, R, K, W> function, TypeInformation<ACC> foldAccumulatorType, TypeInformation<R> resultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction of fold can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
if (windowAssigner instanceof BaseAlignedWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a " + windowAssigner.getClass().getSimpleName() + " assigner.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new FoldApplyWindowFunction<>(initialValue, foldFunction, function, foldAccumulatorType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldAccumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator);
}
Aggregations