use of org.apache.flink.api.common.state.ReducingState in project flink by apache.
the class AbstractKeyedStateBackend method getOrCreateKeyedState.
/**
* @see KeyedStateBackend
*/
@Override
public <N, S extends State, V> S getOrCreateKeyedState(final TypeSerializer<N> namespaceSerializer, StateDescriptor<S, V> stateDescriptor) throws Exception {
checkNotNull(namespaceSerializer, "Namespace serializer");
if (keySerializer == null) {
throw new UnsupportedOperationException("State key serializer has not been configured in the config. " + "This operation cannot use partitioned state.");
}
if (!stateDescriptor.isSerializerInitialized()) {
throw new IllegalStateException("The serializer of the descriptor has not been initialized!");
}
InternalKvState<?> existing = keyValueStatesByName.get(stateDescriptor.getName());
if (existing != null) {
@SuppressWarnings("unchecked") S typedState = (S) existing;
return typedState;
}
// create a new blank key/value state
S state = stateDescriptor.bind(new StateBinder() {
@Override
public <T> ValueState<T> createValueState(ValueStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createValueState(namespaceSerializer, stateDesc);
}
@Override
public <T> ListState<T> createListState(ListStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createListState(namespaceSerializer, stateDesc);
}
@Override
public <T> ReducingState<T> createReducingState(ReducingStateDescriptor<T> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createReducingState(namespaceSerializer, stateDesc);
}
@Override
public <T, ACC, R> AggregatingState<T, R> createAggregatingState(AggregatingStateDescriptor<T, ACC, R> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createAggregatingState(namespaceSerializer, stateDesc);
}
@Override
public <T, ACC> FoldingState<T, ACC> createFoldingState(FoldingStateDescriptor<T, ACC> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createFoldingState(namespaceSerializer, stateDesc);
}
@Override
public <UK, UV> MapState<UK, UV> createMapState(MapStateDescriptor<UK, UV> stateDesc) throws Exception {
return AbstractKeyedStateBackend.this.createMapState(namespaceSerializer, stateDesc);
}
});
@SuppressWarnings("unchecked") InternalKvState<N> kvState = (InternalKvState<N>) state;
keyValueStatesByName.put(stateDescriptor.getName(), kvState);
// Publish queryable state
if (stateDescriptor.isQueryable()) {
if (kvStateRegistry == null) {
throw new IllegalStateException("State backend has not been initialized for job.");
}
String name = stateDescriptor.getQueryableStateName();
kvStateRegistry.registerKvState(keyGroupRange, name, kvState);
}
return state;
}
use of org.apache.flink.api.common.state.ReducingState in project flink by apache.
the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.
/**
* Tests that duplicate query registrations fail the job at the JobManager.
*/
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "duplicate-me";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -4126824763829132959L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -6265024000462809436L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName);
// Submit the job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
assertTrue(serializedThrowable.isPresent());
final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
final String failureCause = ExceptionUtils.stringifyException(t);
assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
}).get();
}
use of org.apache.flink.api.common.state.ReducingState in project flink by apache.
the class AbstractQueryableStateTestBase method testQueryableState.
/**
* Runs a simple topology producing random (key, 1) pairs at the sources (where number of keys
* is in fixed in range 0...numKeys). The records are keyed and a reducing queryable state
* instance is created, which sums up the records.
*
* <p>After submitting the job in detached mode, the QueryableStateCLient is used to query the
* counts of each key in rounds until all keys have non-zero counts.
*/
@Test
public void testQueryableState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "hakuna-matata";
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7143749578983540352L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final AtomicLongArray counts = new AtomicLongArray(numKeys);
final List<CompletableFuture<ReducingState<Tuple2<Integer, Long>>>> futures = new ArrayList<>(numKeys);
boolean allNonZero = false;
while (!allNonZero && deadline.hasTimeLeft()) {
allNonZero = true;
futures.clear();
for (int i = 0; i < numKeys; i++) {
final int key = i;
if (counts.get(key) > 0L) {
// Skip this one
continue;
} else {
allNonZero = false;
}
CompletableFuture<ReducingState<Tuple2<Integer, Long>>> result = getKvState(deadline, client, jobId, queryName, key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
result.thenAccept(response -> {
try {
Tuple2<Integer, Long> res = response.get();
counts.set(key, res.f1);
assertEquals("Key mismatch", key, res.f0.intValue());
} catch (Exception e) {
Assert.fail(e.getMessage());
}
});
futures.add(result);
}
// wait for all the futures to complete
CompletableFuture.allOf(futures.toArray(new CompletableFuture<?>[futures.size()])).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
}
assertTrue("Not all keys are non-zero", allNonZero);
// All should be non-zero
for (int i = 0; i < numKeys; i++) {
long count = counts.get(i);
assertTrue("Count at position " + i + " is " + count, count > 0);
}
}
}
use of org.apache.flink.api.common.state.ReducingState in project flink by apache.
the class AbstractQueryableStateTestBase method testReducingState.
/**
* Tests simple reducing state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The reducing state instance
* sums these up. The test succeeds after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testReducingState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState("jungle", reducingState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final long expected = numElements * (numElements + 1L) / 2L;
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
CompletableFuture<ReducingState<Tuple2<Integer, Long>>> future = getKvState(deadline, client, jobId, "jungle", key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get();
assertEquals("Key mismatch", key, value.f0.intValue());
if (expected == value.f1) {
success = true;
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
}
}
use of org.apache.flink.api.common.state.ReducingState in project flink by apache.
the class StateBackendTestBase method testConcurrentMapIfQueryable.
/**
* Tests that {@link AbstractHeapState} instances respect the queryable flag and create
* concurrent variants for internal state structures.
*/
@SuppressWarnings("unchecked")
protected void testConcurrentMapIfQueryable() throws Exception {
final int numberOfKeyGroups = 1;
final CheckpointableKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE, numberOfKeyGroups, new KeyGroupRange(0, 0), new DummyEnvironment());
try {
{
// ValueState
ValueStateDescriptor<Integer> desc = new ValueStateDescriptor<>("value-state", Integer.class, -1);
desc.setQueryable("my-query");
ValueState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.update(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// ListState
ListStateDescriptor<Integer> desc = new ListStateDescriptor<>("list-state", Integer.class);
desc.setQueryable("my-query");
ListState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.add(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// ReducingState
ReducingStateDescriptor<Integer> desc = new ReducingStateDescriptor<>("reducing-state", new ReduceFunction<Integer>() {
@Override
public Integer reduce(Integer value1, Integer value2) throws Exception {
return value1 + value2;
}
}, Integer.class);
desc.setQueryable("my-query");
ReducingState<Integer> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Integer> kvState = (InternalKvState<Integer, VoidNamespace, Integer>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.add(121818273);
assertNotNull("State not set", ((AbstractHeapState<?, ?, ?>) kvState).getStateTable());
}
{
// MapState
MapStateDescriptor<Integer, String> desc = new MapStateDescriptor<>("map-state", Integer.class, String.class);
desc.setQueryable("my-query");
MapState<Integer, String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, desc);
InternalKvState<Integer, VoidNamespace, Map<Integer, String>> kvState = (InternalKvState<Integer, VoidNamespace, Map<Integer, String>>) state;
assertTrue(kvState instanceof AbstractHeapState);
kvState.setCurrentNamespace(VoidNamespace.INSTANCE);
backend.setCurrentKey(1);
state.put(121818273, "121818273");
int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(1, numberOfKeyGroups);
StateTable stateTable = ((AbstractHeapState) kvState).getStateTable();
assertNotNull("State not set", stateTable.get(keyGroupIndex));
}
} finally {
IOUtils.closeQuietly(backend);
backend.dispose();
}
}
Aggregations