use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.
the class AbstractQueryableStateITCase method testFoldingState.
/**
* Tests simple folding state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The folding state sums these up and maps them to Strings. The
* test succeeds after each subtask index is queried with result n*(n+1)/2
* (as a String).
*/
@Test
public void testFoldingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Folding state
FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("pumba", foldingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
String expected = Integer.toString(numElements * (numElements + 1) / 2);
for (int key = 0; key < NUM_SLOTS; key++) {
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
byte[] serializedValue = Await.result(future, deadline.timeLeft());
String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
if (expected.equals(value)) {
success = true;
} else {
// Retry
Thread.sleep(50);
}
}
assertTrue("Did not succeed query", success);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.
the class AbstractQueryableStateITCase method testReducingState.
/**
* Tests simple reducing state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The reducing state instance sums these up. The test succeeds
* after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testReducingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("jungle", reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Wait until job is running
// Now query
long expected = numElements * (numElements + 1) / 2;
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.
the class AbstractQueryableStateITCase method testQueryNonStartedJobState.
/**
* Similar tests as {@link #testValueState()} but before submitting the
* job, we already issue one request which fails.
*/
@Test
public void testQueryNonStartedJobState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
// Now query
long expected = numElements;
// query once
client.getKvState(jobId, queryableState.getQueryableStateName(), 0, KvStateRequestSerializer.serializeKeyAndNamespace(0, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE));
cluster.submitJobDetached(jobGraph);
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.
the class AbstractQueryableStateITCase method testValueStateDefault.
/**
* Tests simple value state queryable state instance with a default value
* set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
* tuples, the key is mapped to 1 but key 0 is queried which should throw
* a {@link UnknownKeyOrNamespace} exception.
*
* @throws UnknownKeyOrNamespace thrown due querying a non-existent key
*/
@Test(expected = UnknownKeyOrNamespace.class)
public void testValueStateDefault() throws Exception, UnknownKeyOrNamespace {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337l));
// only expose key "1"
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return 1;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
int key = 0;
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, true);
Await.result(future, deadline.timeLeft());
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.
the class AbstractQueryableStateITCase method testQueryableState.
/**
* Runs a simple topology producing random (key, 1) pairs at the sources (where
* number of keys is in fixed in range 0...numKeys). The records are keyed and
* a reducing queryable state instance is created, which sums up the records.
*
* After submitting the job in detached mode, the QueryableStateCLient is used
* to query the counts of each key in rounds until all keys have non-zero counts.
*/
@Test
@SuppressWarnings("unchecked")
public void testQueryableState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numKeys = 256;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
//
// Test program
//
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "hakuna-matata";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
cluster.submitJobDetached(jobGraph);
//
// Start querying
//
jobId = jobGraph.getJobID();
final AtomicLongArray counts = new AtomicLongArray(numKeys);
boolean allNonZero = false;
while (!allNonZero && deadline.hasTimeLeft()) {
allNonZero = true;
final List<Future<byte[]>> futures = new ArrayList<>(numKeys);
for (int i = 0; i < numKeys; i++) {
final int key = i;
if (counts.get(key) > 0) {
// Skip this one
continue;
} else {
allNonZero = false;
}
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
Future<byte[]> serializedResult = getKvStateWithRetries(client, jobId, queryName, key, serializedKey, QUERY_RETRY_DELAY, false);
serializedResult.onSuccess(new OnSuccess<byte[]>() {
@Override
public void onSuccess(byte[] result) throws Throwable {
Tuple2<Integer, Long> value = KvStateRequestSerializer.deserializeValue(result, queryableState.getValueSerializer());
counts.set(key, value.f1);
assertEquals("Key mismatch", key, value.f0.intValue());
}
}, TEST_ACTOR_SYSTEM.dispatcher());
futures.add(serializedResult);
}
Future<Iterable<byte[]>> futureSequence = Futures.sequence(futures, TEST_ACTOR_SYSTEM.dispatcher());
Await.ready(futureSequence, deadline.timeLeft());
}
assertTrue("Not all keys are non-zero", allNonZero);
// All should be non-zero
for (int i = 0; i < numKeys; i++) {
long count = counts.get(i);
assertTrue("Count at position " + i + " is " + count, count > 0);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
Aggregations