use of org.apache.flink.api.common.state.ListState in project flink by apache.
the class FlinkKafkaConsumerBaseTest method checkFilterRestoredPartitionsWithDisovered.
private void checkFilterRestoredPartitionsWithDisovered(List<String> restoredKafkaTopics, List<String> initKafkaTopics, List<String> expectedSubscribedPartitions, Boolean disableFiltering) throws Exception {
final AbstractPartitionDiscoverer discoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(initKafkaTopics, null), 0, 1, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(initKafkaTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(initKafkaTopics.stream().map(topic -> new KafkaTopicPartition(topic, 0)).collect(Collectors.toList())));
final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(initKafkaTopics, discoverer);
if (disableFiltering) {
consumer.disableFilterRestoredPartitionsWithSubscribedTopics();
}
final TestingListState<Tuple2<KafkaTopicPartition, Long>> listState = new TestingListState<>();
for (int i = 0; i < restoredKafkaTopics.size(); i++) {
listState.add(new Tuple2<>(new KafkaTopicPartition(restoredKafkaTopics.get(i), 0), 12345L));
}
setupConsumer(consumer, true, listState, true, 0, 1);
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets = consumer.getSubscribedPartitionsToStartOffsets();
assertEquals(new HashSet<>(expectedSubscribedPartitions), subscribedPartitionsToStartOffsets.keySet().stream().map(partition -> partition.getTopic()).collect(Collectors.toSet()));
}
use of org.apache.flink.api.common.state.ListState in project flink by apache.
the class SimpleStateRequestHandler method getListState.
private ListState<byte[]> getListState(BeamFnApi.StateRequest request) throws Exception {
BeamFnApi.StateKey.BagUserState bagUserState = request.getStateKey().getBagUserState();
byte[] data = Base64.getDecoder().decode(bagUserState.getUserStateId());
FlinkFnApi.StateDescriptor stateDescriptor = FlinkFnApi.StateDescriptor.parseFrom(data);
String stateName = PYTHON_STATE_PREFIX + stateDescriptor.getStateName();
ListStateDescriptor<byte[]> listStateDescriptor;
StateDescriptor cachedStateDescriptor = stateDescriptorCache.get(stateName);
if (cachedStateDescriptor instanceof ListStateDescriptor) {
listStateDescriptor = (ListStateDescriptor<byte[]>) cachedStateDescriptor;
} else if (cachedStateDescriptor == null) {
listStateDescriptor = new ListStateDescriptor<>(stateName, valueSerializer);
if (stateDescriptor.hasStateTtlConfig()) {
FlinkFnApi.StateDescriptor.StateTTLConfig stateTtlConfigProto = stateDescriptor.getStateTtlConfig();
StateTtlConfig stateTtlConfig = ProtoUtils.parseStateTtlConfigFromProto(stateTtlConfigProto);
listStateDescriptor.enableTimeToLive(stateTtlConfig);
}
stateDescriptorCache.put(stateName, listStateDescriptor);
} else {
throw new RuntimeException(String.format("State name corrupt detected: " + "'%s' is used both as LIST state and '%s' state at the same time.", stateName, cachedStateDescriptor.getType()));
}
byte[] windowBytes = bagUserState.getWindow().toByteArray();
if (windowBytes.length != 0) {
bais.setBuffer(windowBytes, 0, windowBytes.length);
Object namespace = namespaceSerializer.deserialize(baisWrapper);
return (ListState<byte[]>) keyedStateBackend.getPartitionedState(namespace, namespaceSerializer, listStateDescriptor);
} else {
return (ListState<byte[]>) keyedStateBackend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, listStateDescriptor);
}
}
use of org.apache.flink.api.common.state.ListState in project flink by apache.
the class AbstractQueryableStateTestBase method testListState.
/**
* Tests simple list state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The list state instance add
* the values to the list. The test succeeds after each subtask index is queried and the list
* contains the correct number of distinct elements.
*/
@Test
public void testListState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
final ListStateDescriptor<Long> listStateDescriptor = new ListStateDescriptor<Long>("list", BasicTypeInfo.LONG_TYPE_INFO);
listStateDescriptor.setQueryable("list-queryable");
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() {
private static final long serialVersionUID = -805125545438296619L;
private transient ListState<Long> listState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
listState = getRuntimeContext().getListState(listStateDescriptor);
}
@Override
public void processElement(Tuple2<Integer, Long> value, Context ctx, Collector<Object> out) throws Exception {
listState.add(value.f1);
}
});
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final Map<Integer, Set<Long>> results = new HashMap<>();
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
final CompletableFuture<ListState<Long>> future = getKvState(deadline, client, jobId, "list-queryable", key, BasicTypeInfo.INT_TYPE_INFO, listStateDescriptor, false, executor);
Iterable<Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get();
Set<Long> res = new HashSet<>();
for (Long v : value) {
res.add(v);
}
// the source starts at 0, so +1
if (res.size() == numElements + 1L) {
success = true;
results.put(key, res);
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
for (int key = 0; key < maxParallelism; key++) {
Set<Long> values = results.get(key);
for (long i = 0L; i <= numElements; i++) {
assertTrue(values.contains(i));
}
}
}
}
use of org.apache.flink.api.common.state.ListState in project flink by apache.
the class StreamingRuntimeContextTest method createListPlainMockOp.
@SuppressWarnings("unchecked")
private static AbstractStreamOperator<?> createListPlainMockOp() throws Exception {
AbstractStreamOperator<?> operatorMock = mock(AbstractStreamOperator.class);
ExecutionConfig config = new ExecutionConfig();
KeyedStateBackend keyedStateBackend = mock(KeyedStateBackend.class);
DefaultKeyedStateStore keyedStateStore = new DefaultKeyedStateStore(keyedStateBackend, config);
when(operatorMock.getExecutionConfig()).thenReturn(config);
doAnswer(new Answer<ListState<String>>() {
@Override
public ListState<String> answer(InvocationOnMock invocationOnMock) throws Throwable {
ListStateDescriptor<String> descr = (ListStateDescriptor<String>) invocationOnMock.getArguments()[2];
AbstractKeyedStateBackend<Integer> backend = new MemoryStateBackend().createKeyedStateBackend(new DummyEnvironment("test_task", 1, 0), new JobID(), "test_op", IntSerializer.INSTANCE, 1, new KeyGroupRange(0, 0), new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()), TtlTimeProvider.DEFAULT, new UnregisteredMetricsGroup(), Collections.emptyList(), new CloseableRegistry());
backend.setCurrentKey(0);
return backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, descr);
}
}).when(keyedStateBackend).getPartitionedState(Matchers.any(), any(TypeSerializer.class), any(ListStateDescriptor.class));
when(operatorMock.getKeyedStateStore()).thenReturn(keyedStateStore);
when(operatorMock.getOperatorID()).thenReturn(new OperatorID());
return operatorMock;
}
use of org.apache.flink.api.common.state.ListState in project flink by apache.
the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.
@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
final int parallelism = numTaskManagers * numSlotsPerTaskManager;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
env.setParallelism(parallelism);
final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {
private ListState<Long> last;
@Override
public void open(Configuration parameters) {
// we use list state here to create sst files of a significant size
// if sst files do not reach certain thresholds they are not stored
// in files, but as a byte stream in checkpoints metadata
last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
last.add(value);
out.collect(value);
}
}).addSink(new SinkFunction<Long>() {
@Override
public void invoke(Long value) {
counter.consumeSync(CountDownLatch::countDown);
}
}).setParallelism(1);
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
try {
final JobID jobID1 = new JobID();
jobGraph.setJobID(jobID1);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
// wait for some records to be processed before taking the checkpoint
counter.get().await();
final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
cluster.getClusterClient().cancel(jobID1).get();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID2 = new JobID();
jobGraph.setJobID(jobID2);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
cluster.getClusterClient().cancel(jobID2).get();
// delete the checkpoint we restored from
FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
// we should be able to restore from the second checkpoint even though it has been built
// on top of the first checkpoint
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID3 = new JobID();
jobGraph.setJobID(jobID3);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
} finally {
cluster.after();
}
}
Aggregations