use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.
/**
* Tests that duplicate query registrations fail the job at the JobManager.
*/
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "duplicate-me";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -4126824763829132959L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -6265024000462809436L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName);
// Submit the job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
assertTrue(serializedThrowable.isPresent());
final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
final String failureCause = ExceptionUtils.stringifyException(t);
assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
}).get();
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testWrongJobIdAndWrongQueryableStateName.
/**
* Tests that the correct exception is thrown if the query contains a wrong jobId or wrong
* queryable state name.
*/
@Test
@Ignore
public void testWrongJobIdAndWrongQueryableStateName() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7662520075515707428L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
try (AutoCancellableJob closableJobGraph = new AutoCancellableJob(deadline, clusterClient, env)) {
clusterClient.submitJob(closableJobGraph.getJobGraph()).get();
CompletableFuture<JobStatus> jobStatusFuture = clusterClient.getJobStatus(closableJobGraph.getJobId());
while (deadline.hasTimeLeft() && !jobStatusFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).equals(JobStatus.RUNNING)) {
Thread.sleep(50);
jobStatusFuture = clusterClient.getJobStatus(closableJobGraph.getJobId());
}
assertEquals(JobStatus.RUNNING, jobStatusFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
final JobID wrongJobId = new JobID();
CompletableFuture<ValueState<Tuple2<Integer, Long>>> unknownJobFuture = client.getKvState(// this is the wrong job id
wrongJobId, "hakuna", 0, BasicTypeInfo.INT_TYPE_INFO, valueState);
try {
unknownJobFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// by now the request must have failed.
fail();
} catch (ExecutionException e) {
Assert.assertTrue("GOT: " + e.getCause().getMessage(), e.getCause() instanceof RuntimeException);
Assert.assertTrue("GOT: " + e.getCause().getMessage(), e.getCause().getMessage().contains("FlinkJobNotFoundException: Could not find Flink job (" + wrongJobId + ")"));
} catch (Exception f) {
fail("Unexpected type of exception: " + f.getMessage());
}
CompletableFuture<ValueState<Tuple2<Integer, Long>>> unknownQSName = client.getKvState(closableJobGraph.getJobId(), // this is the wrong name.
"wrong-hakuna", 0, BasicTypeInfo.INT_TYPE_INFO, valueState);
try {
unknownQSName.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// by now the request must have failed.
fail();
} catch (ExecutionException e) {
Assert.assertTrue("GOT: " + e.getCause().getMessage(), e.getCause() instanceof RuntimeException);
Assert.assertTrue("GOT: " + e.getCause().getMessage(), e.getCause().getMessage().contains("UnknownKvStateLocation: No KvStateLocation found for KvState instance with name 'wrong-hakuna'."));
} catch (Exception f) {
fail("Unexpected type of exception: " + f.getMessage());
}
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testQueryableState.
/**
* Runs a simple topology producing random (key, 1) pairs at the sources (where number of keys
* is in fixed in range 0...numKeys). The records are keyed and a reducing queryable state
* instance is created, which sums up the records.
*
* <p>After submitting the job in detached mode, the QueryableStateCLient is used to query the
* counts of each key in rounds until all keys have non-zero counts.
*/
@Test
public void testQueryableState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "hakuna-matata";
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7143749578983540352L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final AtomicLongArray counts = new AtomicLongArray(numKeys);
final List<CompletableFuture<ReducingState<Tuple2<Integer, Long>>>> futures = new ArrayList<>(numKeys);
boolean allNonZero = false;
while (!allNonZero && deadline.hasTimeLeft()) {
allNonZero = true;
futures.clear();
for (int i = 0; i < numKeys; i++) {
final int key = i;
if (counts.get(key) > 0L) {
// Skip this one
continue;
} else {
allNonZero = false;
}
CompletableFuture<ReducingState<Tuple2<Integer, Long>>> result = getKvState(deadline, client, jobId, queryName, key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
result.thenAccept(response -> {
try {
Tuple2<Integer, Long> res = response.get();
counts.set(key, res.f1);
assertEquals("Key mismatch", key, res.f0.intValue());
} catch (Exception e) {
Assert.fail(e.getMessage());
}
});
futures.add(result);
}
// wait for all the futures to complete
CompletableFuture.allOf(futures.toArray(new CompletableFuture<?>[futures.size()])).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
}
assertTrue("Not all keys are non-zero", allNonZero);
// All should be non-zero
for (int i = 0; i < numKeys; i++) {
long count = counts.get(i);
assertTrue("Count at position " + i + " is " + count, count > 0);
}
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testValueStateShortcut.
/**
* Tests simple value state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The tests succeeds after each
* subtask index is queried with value numElements (the latest element updated the state).
*
* <p>This is the same as the simple value state test, but uses the API shortcut.
*/
@Test
public void testValueStateShortcut() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state shortcut
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 9168901838808830068L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState("matata");
@SuppressWarnings("unchecked") final ValueStateDescriptor<Tuple2<Integer, Long>> stateDesc = (ValueStateDescriptor<Tuple2<Integer, Long>>) queryableState.getStateDescriptor();
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
executeValueQuery(deadline, client, jobId, "matata", stateDesc, numElements);
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testMapState.
/**
* Tests simple map state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The map state instance sums
* the values up. The test succeeds after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testMapState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
final MapStateDescriptor<Integer, Tuple2<Integer, Long>> mapStateDescriptor = new MapStateDescriptor<>("timon", BasicTypeInfo.INT_TYPE_INFO, source.getType());
mapStateDescriptor.setQueryable("timon-queryable");
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() {
private static final long serialVersionUID = -805125545438296619L;
private transient MapState<Integer, Tuple2<Integer, Long>> mapState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
mapState = getRuntimeContext().getMapState(mapStateDescriptor);
}
@Override
public void processElement(Tuple2<Integer, Long> value, Context ctx, Collector<Object> out) throws Exception {
Tuple2<Integer, Long> v = mapState.get(value.f0);
if (v == null) {
v = new Tuple2<>(value.f0, 0L);
}
mapState.put(value.f0, new Tuple2<>(v.f0, v.f1 + value.f1));
}
});
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final long expected = numElements * (numElements + 1L) / 2L;
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
CompletableFuture<MapState<Integer, Tuple2<Integer, Long>>> future = getKvState(deadline, client, jobId, "timon-queryable", key, BasicTypeInfo.INT_TYPE_INFO, mapStateDescriptor, false, executor);
Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get(key);
if (value != null && value.f0 != null && expected == value.f1) {
assertEquals("Key mismatch", key, value.f0.intValue());
success = true;
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
}
}
Aggregations