use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class KafkaConsumerPartitionAssignmentTest method testGrowingPartitionsRemainsStable.
@Test
public void testGrowingPartitionsRemainsStable() {
try {
final int[] newPartitionIDs = { 4, 52, 17, 1, 2, 3, 89, 42, 31, 127, 14 };
List<KafkaTopicPartition> newPartitions = new ArrayList<>();
for (int p : newPartitionIDs) {
KafkaTopicPartition part = new KafkaTopicPartition("test-topic", p);
newPartitions.add(part);
}
List<KafkaTopicPartition> initialPartitions = newPartitions.subList(0, 7);
final Set<KafkaTopicPartition> allNewPartitions = new HashSet<>(newPartitions);
final Set<KafkaTopicPartition> allInitialPartitions = new HashSet<>(initialPartitions);
final int numConsumers = 3;
final int minInitialPartitionsPerConsumer = initialPartitions.size() / numConsumers;
final int maxInitialPartitionsPerConsumer = initialPartitions.size() / numConsumers + 1;
final int minNewPartitionsPerConsumer = newPartitions.size() / numConsumers;
final int maxNewPartitionsPerConsumer = newPartitions.size() / numConsumers + 1;
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets1 = new HashMap<>();
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets2 = new HashMap<>();
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets3 = new HashMap<>();
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets1, initialPartitions, 0, numConsumers, StartupMode.GROUP_OFFSETS, null);
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets2, initialPartitions, 1, numConsumers, StartupMode.GROUP_OFFSETS, null);
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets3, initialPartitions, 2, numConsumers, StartupMode.GROUP_OFFSETS, null);
List<KafkaTopicPartition> subscribedPartitions1 = new ArrayList<>(subscribedPartitionsToStartOffsets1.keySet());
List<KafkaTopicPartition> subscribedPartitions2 = new ArrayList<>(subscribedPartitionsToStartOffsets2.keySet());
List<KafkaTopicPartition> subscribedPartitions3 = new ArrayList<>(subscribedPartitionsToStartOffsets3.keySet());
assertTrue(subscribedPartitions1.size() >= minInitialPartitionsPerConsumer);
assertTrue(subscribedPartitions1.size() <= maxInitialPartitionsPerConsumer);
assertTrue(subscribedPartitions2.size() >= minInitialPartitionsPerConsumer);
assertTrue(subscribedPartitions2.size() <= maxInitialPartitionsPerConsumer);
assertTrue(subscribedPartitions3.size() >= minInitialPartitionsPerConsumer);
assertTrue(subscribedPartitions3.size() <= maxInitialPartitionsPerConsumer);
for (KafkaTopicPartition p : subscribedPartitions1) {
// check that the element was actually contained
assertTrue(allInitialPartitions.remove(p));
}
for (KafkaTopicPartition p : subscribedPartitions2) {
// check that the element was actually contained
assertTrue(allInitialPartitions.remove(p));
}
for (KafkaTopicPartition p : subscribedPartitions3) {
// check that the element was actually contained
assertTrue(allInitialPartitions.remove(p));
}
// all partitions must have been assigned
assertTrue(allInitialPartitions.isEmpty());
// grow the set of partitions and distribute anew
subscribedPartitionsToStartOffsets1 = new HashMap<>();
subscribedPartitionsToStartOffsets2 = new HashMap<>();
subscribedPartitionsToStartOffsets3 = new HashMap<>();
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets1, newPartitions, 0, numConsumers, StartupMode.GROUP_OFFSETS, null);
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets2, newPartitions, 1, numConsumers, StartupMode.GROUP_OFFSETS, null);
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets3, newPartitions, 2, numConsumers, StartupMode.GROUP_OFFSETS, null);
List<KafkaTopicPartition> subscribedPartitions1New = new ArrayList<>(subscribedPartitionsToStartOffsets1.keySet());
List<KafkaTopicPartition> subscribedPartitions2New = new ArrayList<>(subscribedPartitionsToStartOffsets2.keySet());
List<KafkaTopicPartition> subscribedPartitions3New = new ArrayList<>(subscribedPartitionsToStartOffsets3.keySet());
// new partitions must include all old partitions
assertTrue(subscribedPartitions1New.size() > subscribedPartitions1.size());
assertTrue(subscribedPartitions2New.size() > subscribedPartitions2.size());
assertTrue(subscribedPartitions3New.size() > subscribedPartitions3.size());
assertTrue(subscribedPartitions1New.containsAll(subscribedPartitions1));
assertTrue(subscribedPartitions2New.containsAll(subscribedPartitions2));
assertTrue(subscribedPartitions3New.containsAll(subscribedPartitions3));
assertTrue(subscribedPartitions1New.size() >= minNewPartitionsPerConsumer);
assertTrue(subscribedPartitions1New.size() <= maxNewPartitionsPerConsumer);
assertTrue(subscribedPartitions2New.size() >= minNewPartitionsPerConsumer);
assertTrue(subscribedPartitions2New.size() <= maxNewPartitionsPerConsumer);
assertTrue(subscribedPartitions3New.size() >= minNewPartitionsPerConsumer);
assertTrue(subscribedPartitions3New.size() <= maxNewPartitionsPerConsumer);
for (KafkaTopicPartition p : subscribedPartitions1New) {
// check that the element was actually contained
assertTrue(allNewPartitions.remove(p));
}
for (KafkaTopicPartition p : subscribedPartitions2New) {
// check that the element was actually contained
assertTrue(allNewPartitions.remove(p));
}
for (KafkaTopicPartition p : subscribedPartitions3New) {
// check that the element was actually contained
assertTrue(allNewPartitions.remove(p));
}
// all partitions must have been assigned
assertTrue(allNewPartitions.isEmpty());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class KafkaConsumerPartitionAssignmentTest method testPartitionsEqualConsumers.
@Test
public void testPartitionsEqualConsumers() {
try {
List<KafkaTopicPartition> inPartitions = Arrays.asList(new KafkaTopicPartition("test-topic", 4), new KafkaTopicPartition("test-topic", 52), new KafkaTopicPartition("test-topic", 17), new KafkaTopicPartition("test-topic", 1));
for (int i = 0; i < inPartitions.size(); i++) {
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets = new HashMap<>();
FlinkKafkaConsumerBase.initializeSubscribedPartitionsToStartOffsets(subscribedPartitionsToStartOffsets, inPartitions, i, inPartitions.size(), StartupMode.GROUP_OFFSETS, null);
List<KafkaTopicPartition> subscribedPartitions = new ArrayList<>(subscribedPartitionsToStartOffsets.keySet());
assertEquals(1, subscribedPartitions.size());
assertTrue(contains(inPartitions, subscribedPartitions.get(0).getPartition()));
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseTest method checkFilterRestoredPartitionsWithDisovered.
private void checkFilterRestoredPartitionsWithDisovered(List<String> restoredKafkaTopics, List<String> initKafkaTopics, List<String> expectedSubscribedPartitions, Boolean disableFiltering) throws Exception {
final AbstractPartitionDiscoverer discoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(initKafkaTopics, null), 0, 1, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(initKafkaTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(initKafkaTopics.stream().map(topic -> new KafkaTopicPartition(topic, 0)).collect(Collectors.toList())));
final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(initKafkaTopics, discoverer);
if (disableFiltering) {
consumer.disableFilterRestoredPartitionsWithSubscribedTopics();
}
final TestingListState<Tuple2<KafkaTopicPartition, Long>> listState = new TestingListState<>();
for (int i = 0; i < restoredKafkaTopics.size(); i++) {
listState.add(new Tuple2<>(new KafkaTopicPartition(restoredKafkaTopics.get(i), 0), 12345L));
}
setupConsumer(consumer, true, listState, true, 0, 1);
Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets = consumer.getSubscribedPartitionsToStartOffsets();
assertEquals(new HashSet<>(expectedSubscribedPartitions), subscribedPartitionsToStartOffsets.keySet().stream().map(partition -> partition.getTopic()).collect(Collectors.toSet()));
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseTest method testRescaling.
/**
* Tests whether the Kafka consumer behaves correctly when scaling the parallelism up/down,
* which means that operator state is being reshuffled.
*
* <p>This also verifies that a restoring source is always impervious to changes in the list of
* topics fetched from Kafka.
*/
@SuppressWarnings("unchecked")
private void testRescaling(final int initialParallelism, final int numPartitions, final int restoredParallelism, final int restoredNumPartitions) throws Exception {
Preconditions.checkArgument(restoredNumPartitions >= numPartitions, "invalid test case for Kafka repartitioning; Kafka only allows increasing partitions.");
List<KafkaTopicPartition> mockFetchedPartitionsOnStartup = new ArrayList<>();
for (int i = 0; i < numPartitions; i++) {
mockFetchedPartitionsOnStartup.add(new KafkaTopicPartition("test-topic", i));
}
DummyFlinkKafkaConsumer<String>[] consumers = new DummyFlinkKafkaConsumer[initialParallelism];
AbstractStreamOperatorTestHarness<String>[] testHarnesses = new AbstractStreamOperatorTestHarness[initialParallelism];
List<String> testTopics = Collections.singletonList("test-topic");
for (int i = 0; i < initialParallelism; i++) {
TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, initialParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsOnStartup));
consumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
testHarnesses[i] = createTestHarness(consumers[i], initialParallelism, i);
// initializeState() is always called, null signals that we didn't restore
testHarnesses[i].initializeEmptyState();
testHarnesses[i].open();
}
Map<KafkaTopicPartition, Long> globalSubscribedPartitions = new HashMap<>();
for (int i = 0; i < initialParallelism; i++) {
Map<KafkaTopicPartition, Long> subscribedPartitions = consumers[i].getSubscribedPartitionsToStartOffsets();
// make sure that no one else is subscribed to these partitions
for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
assertThat(globalSubscribedPartitions, not(hasKey(partition)));
}
globalSubscribedPartitions.putAll(subscribedPartitions);
}
assertThat(globalSubscribedPartitions.values(), hasSize(numPartitions));
assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(globalSubscribedPartitions.keySet())));
OperatorSubtaskState[] state = new OperatorSubtaskState[initialParallelism];
for (int i = 0; i < initialParallelism; i++) {
state[i] = testHarnesses[i].snapshot(0, 0);
}
OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(state);
// -----------------------------------------------------------------------------------------
// restore
List<KafkaTopicPartition> mockFetchedPartitionsAfterRestore = new ArrayList<>();
for (int i = 0; i < restoredNumPartitions; i++) {
mockFetchedPartitionsAfterRestore.add(new KafkaTopicPartition("test-topic", i));
}
DummyFlinkKafkaConsumer<String>[] restoredConsumers = new DummyFlinkKafkaConsumer[restoredParallelism];
AbstractStreamOperatorTestHarness<String>[] restoredTestHarnesses = new AbstractStreamOperatorTestHarness[restoredParallelism];
for (int i = 0; i < restoredParallelism; i++) {
OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(mergedState, maxParallelism, initialParallelism, restoredParallelism, i);
TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, restoredParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsAfterRestore));
restoredConsumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
restoredTestHarnesses[i] = createTestHarness(restoredConsumers[i], restoredParallelism, i);
// initializeState() is always called, null signals that we didn't restore
restoredTestHarnesses[i].initializeState(initState);
restoredTestHarnesses[i].open();
}
Map<KafkaTopicPartition, Long> restoredGlobalSubscribedPartitions = new HashMap<>();
for (int i = 0; i < restoredParallelism; i++) {
Map<KafkaTopicPartition, Long> subscribedPartitions = restoredConsumers[i].getSubscribedPartitionsToStartOffsets();
// make sure that no one else is subscribed to these partitions
for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
assertThat(restoredGlobalSubscribedPartitions, not(hasKey(partition)));
}
restoredGlobalSubscribedPartitions.putAll(subscribedPartitions);
}
assertThat(restoredGlobalSubscribedPartitions.values(), hasSize(restoredNumPartitions));
assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(restoredGlobalSubscribedPartitions.keySet())));
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseTest method testExplicitStateSerializerCompatibility.
/**
* Before using an explicit TypeSerializer for the partition state the {@link
* FlinkKafkaConsumerBase} was creating a serializer using a {@link TypeHint}. Here, we verify
* that the two methods create compatible serializers.
*/
@Test
public void testExplicitStateSerializerCompatibility() throws Exception {
ExecutionConfig executionConfig = new ExecutionConfig();
Tuple2<KafkaTopicPartition, Long> tuple = new Tuple2<>(new KafkaTopicPartition("dummy", 0), 42L);
// This is how the KafkaConsumerBase used to create the TypeSerializer
TypeInformation<Tuple2<KafkaTopicPartition, Long>> originalTypeHintTypeInfo = new TypeHint<Tuple2<KafkaTopicPartition, Long>>() {
}.getTypeInfo();
TypeSerializer<Tuple2<KafkaTopicPartition, Long>> serializerFromTypeHint = originalTypeHintTypeInfo.createSerializer(executionConfig);
byte[] bytes = InstantiationUtil.serializeToByteArray(serializerFromTypeHint, tuple);
// Directly use the Consumer to create the TypeSerializer (using the new method)
TupleSerializer<Tuple2<KafkaTopicPartition, Long>> kafkaConsumerSerializer = FlinkKafkaConsumerBase.createStateSerializer(executionConfig);
Tuple2<KafkaTopicPartition, Long> actualTuple = InstantiationUtil.deserializeFromByteArray(kafkaConsumerSerializer, bytes);
Assert.assertEquals("Explicit Serializer is not compatible with previous method of creating Serializer using TypeHint.", tuple, actualTuple);
}
Aggregations