use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class FlinkKafkaConsumerBaseTest method testRescaling.
/**
* Tests whether the Kafka consumer behaves correctly when scaling the parallelism up/down,
* which means that operator state is being reshuffled.
*
* <p>This also verifies that a restoring source is always impervious to changes in the list of
* topics fetched from Kafka.
*/
@SuppressWarnings("unchecked")
private void testRescaling(final int initialParallelism, final int numPartitions, final int restoredParallelism, final int restoredNumPartitions) throws Exception {
Preconditions.checkArgument(restoredNumPartitions >= numPartitions, "invalid test case for Kafka repartitioning; Kafka only allows increasing partitions.");
List<KafkaTopicPartition> mockFetchedPartitionsOnStartup = new ArrayList<>();
for (int i = 0; i < numPartitions; i++) {
mockFetchedPartitionsOnStartup.add(new KafkaTopicPartition("test-topic", i));
}
DummyFlinkKafkaConsumer<String>[] consumers = new DummyFlinkKafkaConsumer[initialParallelism];
AbstractStreamOperatorTestHarness<String>[] testHarnesses = new AbstractStreamOperatorTestHarness[initialParallelism];
List<String> testTopics = Collections.singletonList("test-topic");
for (int i = 0; i < initialParallelism; i++) {
TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, initialParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsOnStartup));
consumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
testHarnesses[i] = createTestHarness(consumers[i], initialParallelism, i);
// initializeState() is always called, null signals that we didn't restore
testHarnesses[i].initializeEmptyState();
testHarnesses[i].open();
}
Map<KafkaTopicPartition, Long> globalSubscribedPartitions = new HashMap<>();
for (int i = 0; i < initialParallelism; i++) {
Map<KafkaTopicPartition, Long> subscribedPartitions = consumers[i].getSubscribedPartitionsToStartOffsets();
// make sure that no one else is subscribed to these partitions
for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
assertThat(globalSubscribedPartitions, not(hasKey(partition)));
}
globalSubscribedPartitions.putAll(subscribedPartitions);
}
assertThat(globalSubscribedPartitions.values(), hasSize(numPartitions));
assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(globalSubscribedPartitions.keySet())));
OperatorSubtaskState[] state = new OperatorSubtaskState[initialParallelism];
for (int i = 0; i < initialParallelism; i++) {
state[i] = testHarnesses[i].snapshot(0, 0);
}
OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(state);
// -----------------------------------------------------------------------------------------
// restore
List<KafkaTopicPartition> mockFetchedPartitionsAfterRestore = new ArrayList<>();
for (int i = 0; i < restoredNumPartitions; i++) {
mockFetchedPartitionsAfterRestore.add(new KafkaTopicPartition("test-topic", i));
}
DummyFlinkKafkaConsumer<String>[] restoredConsumers = new DummyFlinkKafkaConsumer[restoredParallelism];
AbstractStreamOperatorTestHarness<String>[] restoredTestHarnesses = new AbstractStreamOperatorTestHarness[restoredParallelism];
for (int i = 0; i < restoredParallelism; i++) {
OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(mergedState, maxParallelism, initialParallelism, restoredParallelism, i);
TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, restoredParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsAfterRestore));
restoredConsumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
restoredTestHarnesses[i] = createTestHarness(restoredConsumers[i], restoredParallelism, i);
// initializeState() is always called, null signals that we didn't restore
restoredTestHarnesses[i].initializeState(initState);
restoredTestHarnesses[i].open();
}
Map<KafkaTopicPartition, Long> restoredGlobalSubscribedPartitions = new HashMap<>();
for (int i = 0; i < restoredParallelism; i++) {
Map<KafkaTopicPartition, Long> subscribedPartitions = restoredConsumers[i].getSubscribedPartitionsToStartOffsets();
// make sure that no one else is subscribed to these partitions
for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
assertThat(restoredGlobalSubscribedPartitions, not(hasKey(partition)));
}
restoredGlobalSubscribedPartitions.putAll(subscribedPartitions);
}
assertThat(restoredGlobalSubscribedPartitions.values(), hasSize(restoredNumPartitions));
assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(restoredGlobalSubscribedPartitions.keySet())));
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class FlinkKafkaConsumerBaseMigrationTest method writeSnapshot.
private void writeSnapshot(String path, HashMap<KafkaTopicPartition, Long> state) throws Exception {
final OneShotLatch latch = new OneShotLatch();
final AbstractFetcher<String, ?> fetcher = mock(AbstractFetcher.class);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
latch.trigger();
return null;
}
}).when(fetcher).runFetchLoop();
when(fetcher.snapshotCurrentState()).thenReturn(state);
final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(fetcher, TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
testHarness.setup();
testHarness.open();
final Throwable[] error = new Throwable[1];
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
consumerFunction.run(new DummySourceContext() {
@Override
public void collect(String element) {
}
});
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
if (!latch.isTriggered()) {
latch.await();
}
final OperatorSubtaskState snapshot;
synchronized (testHarness.getCheckpointLock()) {
snapshot = testHarness.snapshot(0L, 0L);
}
OperatorSnapshotUtil.writeStateHandle(snapshot, path);
consumerOperator.close();
runner.join();
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class FlinkKafkaProducerITCase method testScaleUpAfterScalingDown.
/**
* Each instance of FlinkKafkaProducer uses it's own pool of transactional ids. After the
* restore from checkpoint transactional ids are redistributed across the subtasks. In case of
* scale down, the surplus transactional ids are dropped. In case of scale up, new one are
* generated (for the new subtasks). This test make sure that sequence of scaling down and up
* again works fine. Especially it checks whether the newly generated ids in scaling up do not
* overlap with ids that were used before scaling down. For example we start with 4 ids and
* parallelism 4: [1], [2], [3], [4] - one assigned per each subtask we scale down to
* parallelism 2: [1, 2], [3, 4] - first subtask got id 1 and 2, second got ids 3 and 4 surplus
* ids are dropped from the pools and we scale up to parallelism 3: [1 or 2], [3 or 4], [???]
* new subtask have to generate new id(s), but he can not use ids that are potentially in use,
* so it has to generate new ones that are greater then 4.
*/
@Test
public void testScaleUpAfterScalingDown() throws Exception {
String topic = "scale-up-after-scaling-down";
final int parallelism1 = 4;
final int parallelism2 = 2;
final int parallelism3 = 3;
final int maxParallelism = Math.max(parallelism1, Math.max(parallelism2, parallelism3));
OperatorSubtaskState operatorSubtaskState = repartitionAndExecute(topic, OperatorSubtaskState.builder().build(), parallelism1, parallelism1, maxParallelism, IntStream.range(0, parallelism1).boxed().iterator());
operatorSubtaskState = repartitionAndExecute(topic, operatorSubtaskState, parallelism1, parallelism2, maxParallelism, IntStream.range(parallelism1, parallelism1 + parallelism2).boxed().iterator());
operatorSubtaskState = repartitionAndExecute(topic, operatorSubtaskState, parallelism2, parallelism3, maxParallelism, IntStream.range(parallelism1 + parallelism2, parallelism1 + parallelism2 + parallelism3).boxed().iterator());
// After each previous repartitionAndExecute call, we are left with some lingering
// transactions, that would
// not allow us to read all committed messages from the topic. Thus we initialize operators
// from
// OperatorSubtaskState once more, but without any new data. This should terminate all
// ongoing transactions.
repartitionAndExecute(topic, operatorSubtaskState, parallelism3, 1, maxParallelism, Collections.emptyIterator());
assertExactlyOnceForTopic(createProperties(), topic, IntStream.range(0, parallelism1 + parallelism2 + parallelism3).boxed().collect(Collectors.toList()));
deleteTestTopic(topic);
checkProducerLeak();
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class FlinkKafkaProducerITCase method testFlinkKafkaProducerFailBeforeNotify.
/**
* This test hangs when running it in your IDE.
*/
@Test
@Ignore
public void testFlinkKafkaProducerFailBeforeNotify() throws Exception {
String topic = "flink-kafka-producer-fail-before-notify";
OneInputStreamOperatorTestHarness<Integer, Object> testHarness = createTestHarness(topic);
testHarness.setup();
testHarness.open();
testHarness.processElement(42, 0);
testHarness.snapshot(0, 1);
testHarness.processElement(43, 2);
OperatorSubtaskState snapshot = testHarness.snapshot(1, 3);
int leaderId = kafkaServer.getLeaderToShutDown(topic);
failBroker(leaderId);
try {
testHarness.processElement(44, 4);
testHarness.snapshot(2, 5);
fail();
} catch (Exception ex) {
// expected
}
try {
testHarness.close();
} catch (Exception ex) {
}
kafkaServer.restartBroker(leaderId);
testHarness = createTestHarness(topic);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.close();
assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43));
deleteTestTopic(topic);
checkProducerLeak();
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class FlinkKafkaProducerITCase method testRestoreUsingDifferentTransactionalIdPrefix.
@Test
public void testRestoreUsingDifferentTransactionalIdPrefix() throws Exception {
String topic = "testCustomizeTransactionalIdPrefix";
Properties properties = createProperties();
final String transactionalIdPrefix1 = "my-prefix1";
FlinkKafkaProducer<Integer> kafkaProducer1 = new FlinkKafkaProducer<>(topic, integerKeyedSerializationSchema, properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
kafkaProducer1.setTransactionalIdPrefix(transactionalIdPrefix1);
OperatorSubtaskState snapshot;
try (OneInputStreamOperatorTestHarness<Integer, Object> testHarness1 = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(kafkaProducer1), IntSerializer.INSTANCE)) {
testHarness1.setup();
testHarness1.open();
testHarness1.processElement(42, 0);
snapshot = testHarness1.snapshot(0, 1);
testHarness1.processElement(43, 2);
}
final String transactionalIdPrefix2 = "my-prefix2";
FlinkKafkaProducer<Integer> kafkaProducer2 = new FlinkKafkaProducer<>(topic, integerKeyedSerializationSchema, properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
kafkaProducer2.setTransactionalIdPrefix(transactionalIdPrefix2);
try (OneInputStreamOperatorTestHarness<Integer, Object> testHarness2 = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(kafkaProducer2), IntSerializer.INSTANCE)) {
testHarness2.setup();
// restore from the previous snapshot, transactions with records 43 should be aborted
testHarness2.initializeState(snapshot);
testHarness2.open();
testHarness2.processElement(44, 3);
testHarness2.snapshot(1, 4);
testHarness2.processElement(45, 5);
testHarness2.notifyOfCompletedCheckpoint(1);
testHarness2.processElement(46, 6);
}
assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 44));
checkProducerLeak();
}
Aggregations