use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class AbstractQueryableStateITCase method testQueryableState.
/**
* Runs a simple topology producing random (key, 1) pairs at the sources (where
* number of keys is in fixed in range 0...numKeys). The records are keyed and
* a reducing queryable state instance is created, which sums up the records.
*
* After submitting the job in detached mode, the QueryableStateCLient is used
* to query the counts of each key in rounds until all keys have non-zero counts.
*/
@Test
@SuppressWarnings("unchecked")
public void testQueryableState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numKeys = 256;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
//
// Test program
//
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "hakuna-matata";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
cluster.submitJobDetached(jobGraph);
//
// Start querying
//
jobId = jobGraph.getJobID();
final AtomicLongArray counts = new AtomicLongArray(numKeys);
boolean allNonZero = false;
while (!allNonZero && deadline.hasTimeLeft()) {
allNonZero = true;
final List<Future<byte[]>> futures = new ArrayList<>(numKeys);
for (int i = 0; i < numKeys; i++) {
final int key = i;
if (counts.get(key) > 0) {
// Skip this one
continue;
} else {
allNonZero = false;
}
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
Future<byte[]> serializedResult = getKvStateWithRetries(client, jobId, queryName, key, serializedKey, QUERY_RETRY_DELAY, false);
serializedResult.onSuccess(new OnSuccess<byte[]>() {
@Override
public void onSuccess(byte[] result) throws Throwable {
Tuple2<Integer, Long> value = KvStateRequestSerializer.deserializeValue(result, queryableState.getValueSerializer());
counts.set(key, value.f1);
assertEquals("Key mismatch", key, value.f0.intValue());
}
}, TEST_ACTOR_SYSTEM.dispatcher());
futures.add(serializedResult);
}
Future<Iterable<byte[]>> futureSequence = Futures.sequence(futures, TEST_ACTOR_SYSTEM.dispatcher());
Await.ready(futureSequence, deadline.timeLeft());
}
assertTrue("Not all keys are non-zero", allNonZero);
// All should be non-zero
for (int i = 0; i < numKeys; i++) {
long count = counts.get(i);
assertTrue("Count at position " + i + " is " + count, count > 0);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class AbstractQueryableStateITCase method testDuplicateRegistrationFailsJob.
/**
* Tests that duplicate query registrations fail the job at the JobManager.
*/
@Test
public void testDuplicateRegistrationFailsJob() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numKeys = 256;
JobID jobId = null;
try {
//
// Test program
//
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "duplicate-me";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState(queryName);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
Future<JobStatusIs> failedFuture = cluster.getLeaderGateway(deadline.timeLeft()).ask(new NotifyWhenJobStatus(jobId, JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
cluster.submitJobDetached(jobGraph);
JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
assertEquals(JobStatus.FAILED, jobStatus.state());
// Get the job and check the cause
JobFound jobFound = Await.result(cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.RequestJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobFound>apply(JobFound.class)), deadline.timeLeft());
String failureCause = jobFound.executionGraph().getFailureCauseAsString();
assertTrue("Not instance of SuppressRestartsException", failureCause.startsWith("org.apache.flink.runtime.execution.SuppressRestartsException"));
int causedByIndex = failureCause.indexOf("Caused by: ");
String subFailureCause = failureCause.substring(causedByIndex + "Caused by: ".length());
assertTrue("Not caused by IllegalStateException", subFailureCause.startsWith("java.lang.IllegalStateException"));
assertTrue("Exception does not contain registration name", subFailureCause.contains(queryName));
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
}
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class WindowOperatorMigrationTest method testRestoreReducingEventTimeWindowsFromFlink11.
@Test
@SuppressWarnings("unchecked")
public void testRestoreReducingEventTimeWindowsFromFlink11() throws Exception {
final int WINDOW_SIZE = 3;
TypeInformation<Tuple2<String, Integer>> inputType = TypeInfoParser.parse("Tuple2<String, Integer>");
ReducingStateDescriptor<Tuple2<String, Integer>> stateDesc = new ReducingStateDescriptor<>("window-contents", new SumReducer(), inputType.createSerializer(new ExecutionConfig()));
WindowOperator<String, Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>, TimeWindow> operator = new WindowOperator<>(TumblingEventTimeWindows.of(Time.of(WINDOW_SIZE, TimeUnit.SECONDS)), new TimeWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalSingleValueWindowFunction<>(new PassThroughWindowFunction<String, TimeWindow, Tuple2<String, Integer>>()), EventTimeTrigger.create(), 0, null);
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
/*
operator.setInputType(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), new ExecutionConfig());
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness =
new OneInputStreamOperatorTestHarness<>(operator);
testHarness.configureForKeyedStream(new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setup();
testHarness.open();
// add elements out-of-order
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3000));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 20));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 0));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1998));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1999));
testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));
testHarness.processWatermark(new Watermark(999));
expectedOutput.add(new Watermark(999));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());
testHarness.processWatermark(new Watermark(1999));
expectedOutput.add(new Watermark(1999));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());
// do snapshot and save to file
StreamTaskState snapshot = testHarness.snapshot(0L, 0L);
testHarness.snaphotToFile(snapshot, "src/test/resources/win-op-migration-test-reduce-event-time-flink1.1-snapshot");
testHarness.close();
*/
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setup();
testHarness.initializeStateFromLegacyCheckpoint(getResourceFilename("win-op-migration-test-reduce-event-time-flink1.1-snapshot"));
testHarness.open();
testHarness.processWatermark(new Watermark(2999));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key1", 3), 2999));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 3), 2999));
expectedOutput.add(new Watermark(2999));
testHarness.processWatermark(new Watermark(3999));
expectedOutput.add(new Watermark(3999));
testHarness.processWatermark(new Watermark(4999));
expectedOutput.add(new Watermark(4999));
testHarness.processWatermark(new Watermark(5999));
expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 2), 5999));
expectedOutput.add(new Watermark(5999));
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());
testHarness.close();
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.
/**
* Tests that duplicate query registrations fail the job at the JobManager.
*/
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "duplicate-me";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -4126824763829132959L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -6265024000462809436L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName);
// Submit the job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
assertTrue(serializedThrowable.isPresent());
final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
final String failureCause = ExceptionUtils.stringifyException(t);
assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
}).get();
}
use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.
the class AbstractQueryableStateTestBase method testQueryableState.
/**
* Runs a simple topology producing random (key, 1) pairs at the sources (where number of keys
* is in fixed in range 0...numKeys). The records are keyed and a reducing queryable state
* instance is created, which sums up the records.
*
* <p>After submitting the job in detached mode, the QueryableStateCLient is used to query the
* counts of each key in rounds until all keys have non-zero counts.
*/
@Test
public void testQueryableState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "hakuna-matata";
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7143749578983540352L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final AtomicLongArray counts = new AtomicLongArray(numKeys);
final List<CompletableFuture<ReducingState<Tuple2<Integer, Long>>>> futures = new ArrayList<>(numKeys);
boolean allNonZero = false;
while (!allNonZero && deadline.hasTimeLeft()) {
allNonZero = true;
futures.clear();
for (int i = 0; i < numKeys; i++) {
final int key = i;
if (counts.get(key) > 0L) {
// Skip this one
continue;
} else {
allNonZero = false;
}
CompletableFuture<ReducingState<Tuple2<Integer, Long>>> result = getKvState(deadline, client, jobId, queryName, key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
result.thenAccept(response -> {
try {
Tuple2<Integer, Long> res = response.get();
counts.set(key, res.f1);
assertEquals("Key mismatch", key, res.f0.intValue());
} catch (Exception e) {
Assert.fail(e.getMessage());
}
});
futures.add(result);
}
// wait for all the futures to complete
CompletableFuture.allOf(futures.toArray(new CompletableFuture<?>[futures.size()])).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
}
assertTrue("Not all keys are non-zero", allNonZero);
// All should be non-zero
for (int i = 0; i < numKeys; i++) {
long count = counts.get(i);
assertTrue("Count at position " + i + " is " + count, count > 0);
}
}
}
Aggregations