use of org.apache.flink.api.common.state.MapState in project flink by apache.
the class SimpleStateRequestHandler method getMapState.
private MapState<ByteArrayWrapper, byte[]> getMapState(BeamFnApi.StateRequest request) throws Exception {
BeamFnApi.StateKey.MultimapSideInput mapUserState = request.getStateKey().getMultimapSideInput();
byte[] data = Base64.getDecoder().decode(mapUserState.getSideInputId());
FlinkFnApi.StateDescriptor stateDescriptor = FlinkFnApi.StateDescriptor.parseFrom(data);
String stateName = PYTHON_STATE_PREFIX + stateDescriptor.getStateName();
StateDescriptor cachedStateDescriptor = stateDescriptorCache.get(stateName);
MapStateDescriptor<ByteArrayWrapper, byte[]> mapStateDescriptor;
if (cachedStateDescriptor instanceof MapStateDescriptor) {
mapStateDescriptor = (MapStateDescriptor<ByteArrayWrapper, byte[]>) cachedStateDescriptor;
} else if (cachedStateDescriptor == null) {
mapStateDescriptor = new MapStateDescriptor<>(stateName, ByteArrayWrapperSerializer.INSTANCE, valueSerializer);
if (stateDescriptor.hasStateTtlConfig()) {
FlinkFnApi.StateDescriptor.StateTTLConfig stateTtlConfigProto = stateDescriptor.getStateTtlConfig();
StateTtlConfig stateTtlConfig = ProtoUtils.parseStateTtlConfigFromProto(stateTtlConfigProto);
mapStateDescriptor.enableTimeToLive(stateTtlConfig);
}
stateDescriptorCache.put(stateName, mapStateDescriptor);
} else {
throw new RuntimeException(String.format("State name corrupt detected: " + "'%s' is used both as MAP state and '%s' state at the same time.", stateName, cachedStateDescriptor.getType()));
}
byte[] windowBytes = mapUserState.getWindow().toByteArray();
if (windowBytes.length != 0) {
bais.setBuffer(windowBytes, 0, windowBytes.length);
Object namespace = namespaceSerializer.deserialize(baisWrapper);
return (MapState<ByteArrayWrapper, byte[]>) keyedStateBackend.getPartitionedState(namespace, namespaceSerializer, mapStateDescriptor);
} else {
return (MapState<ByteArrayWrapper, byte[]>) keyedStateBackend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, mapStateDescriptor);
}
}
use of org.apache.flink.api.common.state.MapState in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.
@Test
public void testBatchExecutionWithTimersOneInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
use of org.apache.flink.api.common.state.MapState in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.
@Test
public void testBatchExecutionWithTimersTwoInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
private void processElement(Integer value, Context ctx) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
use of org.apache.flink.api.common.state.MapState in project flink by apache.
the class AbstractKeyedStateBackend method getOrCreateKeyedState.
/**
* @see KeyedStateBackend
*/
@Override
public <N, S extends State, V> S getOrCreateKeyedState(final TypeSerializer<N> namespaceSerializer, StateDescriptor<S, V> stateDescriptor) throws Exception {
checkNotNull(namespaceSerializer, "Namespace serializer");
if (keySerializer == null) {
throw new UnsupportedOperationException("State key serializer has not been configured in the config. " + "This operation cannot use partitioned state.");
}
if (!stateDescriptor.isSerializerInitialized()) {
throw new IllegalStateException("The serializer of the descriptor has not been initialized!");
}
InternalKvState<?> existing = keyValueStatesByName.get(stateDescriptor.getName());
if (existing != null) {
@SuppressWarnings("unchecked") S typedState = (S) existing;
return typedState;
}
// create a new blank key/value state
S state = stateDescriptor.bind(new StateBinder() {
@Override
public <T> ValueState<T> createValueState(ValueStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createValueState(namespaceSerializer, stateDesc);
}
@Override
public <T> ListState<T> createListState(ListStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createListState(namespaceSerializer, stateDesc);
}
@Override
public <T> ReducingState<T> createReducingState(ReducingStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createReducingState(namespaceSerializer, stateDesc);
}
@Override
public <T, ACC, R> AggregatingState<T, R> createAggregatingState(AggregatingStateDescriptor<T, ACC, R> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createAggregatingState(namespaceSerializer, stateDesc);
}
@Override
public <T, ACC> FoldingState<T, ACC> createFoldingState(FoldingStateDescriptor<T, ACC> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createFoldingState(namespaceSerializer, stateDesc);
}
@Override
public <UK, UV> MapState<UK, UV> createMapState(MapStateDescriptor<UK, UV> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createMapState(namespaceSerializer, stateDesc);
}
});
@SuppressWarnings("unchecked") InternalKvState<N> kvState = (InternalKvState<N>) state;
keyValueStatesByName.put(stateDescriptor.getName(), kvState);
// Publish queryable state
if (stateDescriptor.isQueryable()) {
if (kvStateRegistry == null) {
throw new IllegalStateException("State backend has not been initialized for job.");
}
String name = stateDescriptor.getQueryableStateName();
kvStateRegistry.registerKvState(keyGroupRange, name, kvState);
}
return state;
}
use of org.apache.flink.api.common.state.MapState in project flink by apache.
the class AbstractQueryableStateTestBase method testMapState.
/**
* Tests simple map state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The map state instance sums
* the values up. The test succeeds after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testMapState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
final MapStateDescriptor<Integer, Tuple2<Integer, Long>> mapStateDescriptor = new MapStateDescriptor<>("timon", BasicTypeInfo.INT_TYPE_INFO, source.getType());
mapStateDescriptor.setQueryable("timon-queryable");
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() {
private static final long serialVersionUID = -805125545438296619L;
private transient MapState<Integer, Tuple2<Integer, Long>> mapState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
mapState = getRuntimeContext().getMapState(mapStateDescriptor);
}
@Override
public void processElement(Tuple2<Integer, Long> value, Context ctx, Collector<Object> out) throws Exception {
Tuple2<Integer, Long> v = mapState.get(value.f0);
if (v == null) {
v = new Tuple2<>(value.f0, 0L);
}
mapState.put(value.f0, new Tuple2<>(v.f0, v.f1 + value.f1));
}
});
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final long expected = numElements * (numElements + 1L) / 2L;
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
CompletableFuture<MapState<Integer, Tuple2<Integer, Long>>> future = getKvState(deadline, client, jobId, "timon-queryable", key, BasicTypeInfo.INT_TYPE_INFO, mapStateDescriptor, false, executor);
Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get(key);
if (value != null && value.f0 != null && expected == value.f1) {
assertEquals("Key mismatch", key, value.f0.intValue());
success = true;
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
}
}
Aggregations