Search in sources :

Example 1 with KafkaTopicsDescriptor

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor in project flink by apache.

the class FlinkKafkaConsumerBaseTest method checkFilterRestoredPartitionsWithDisovered.

private void checkFilterRestoredPartitionsWithDisovered(List<String> restoredKafkaTopics, List<String> initKafkaTopics, List<String> expectedSubscribedPartitions, Boolean disableFiltering) throws Exception {
    final AbstractPartitionDiscoverer discoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(initKafkaTopics, null), 0, 1, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(initKafkaTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(initKafkaTopics.stream().map(topic -> new KafkaTopicPartition(topic, 0)).collect(Collectors.toList())));
    final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(initKafkaTopics, discoverer);
    if (disableFiltering) {
        consumer.disableFilterRestoredPartitionsWithSubscribedTopics();
    }
    final TestingListState<Tuple2<KafkaTopicPartition, Long>> listState = new TestingListState<>();
    for (int i = 0; i < restoredKafkaTopics.size(); i++) {
        listState.add(new Tuple2<>(new KafkaTopicPartition(restoredKafkaTopics.get(i), 0), 12345L));
    }
    setupConsumer(consumer, true, listState, true, 0, 1);
    Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets = consumer.getSubscribedPartitionsToStartOffsets();
    assertEquals(new HashSet<>(expectedSubscribedPartitions), subscribedPartitionsToStartOffsets.keySet().stream().map(partition -> partition.getTopic()).collect(Collectors.toSet()));
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ThrowingRunnable(org.apache.flink.util.function.ThrowingRunnable) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IsIn.isIn(org.hamcrest.collection.IsIn.isIn) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) Mockito.doThrow(org.mockito.Mockito.doThrow) InstantiationUtil(org.apache.flink.util.InstantiationUtil) Matchers.everyItem(org.hamcrest.Matchers.everyItem) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) KeyedStateStore(org.apache.flink.api.common.state.KeyedStateStore) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) IsNot.not(org.hamcrest.core.IsNot.not) IsMapContaining.hasKey(org.hamcrest.collection.IsMapContaining.hasKey) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Collection(java.util.Collection) AbstractFetcher(org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher) KafkaTopicsDescriptor(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) Preconditions(org.apache.flink.util.Preconditions) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) TestSourceContext(org.apache.flink.streaming.connectors.kafka.testutils.TestSourceContext) MetricGroup(org.apache.flink.metrics.MetricGroup) KafkaCommitCallback(org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) Assert.assertFalse(org.junit.Assert.assertFalse) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Matchers.is(org.hamcrest.Matchers.is) TestPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer) KeyedDeserializationSchema(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema) SupplierWithException(org.apache.flink.util.function.SupplierWithException) Mockito.mock(org.mockito.Mockito.mock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) AbstractPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer) ArrayList(java.util.ArrayList) AssignerWithPeriodicWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks) MockStreamingRuntimeContext(org.apache.flink.streaming.util.MockStreamingRuntimeContext) HashSet(java.util.HashSet) OptionalLong(java.util.OptionalLong) OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) OffsetCommitMode(org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Nonnull(javax.annotation.Nonnull) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) TimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) Assert.assertNull(org.junit.Assert.assertNull) MockDeserializationSchema(org.apache.flink.streaming.util.MockDeserializationSchema) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) AbstractPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TestPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OptionalLong(java.util.OptionalLong) KafkaTopicsDescriptor(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor)

Example 2 with KafkaTopicsDescriptor

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor in project flink by apache.

the class FlinkKafkaConsumerBaseTest method testRescaling.

/**
 * Tests whether the Kafka consumer behaves correctly when scaling the parallelism up/down,
 * which means that operator state is being reshuffled.
 *
 * <p>This also verifies that a restoring source is always impervious to changes in the list of
 * topics fetched from Kafka.
 */
@SuppressWarnings("unchecked")
private void testRescaling(final int initialParallelism, final int numPartitions, final int restoredParallelism, final int restoredNumPartitions) throws Exception {
    Preconditions.checkArgument(restoredNumPartitions >= numPartitions, "invalid test case for Kafka repartitioning; Kafka only allows increasing partitions.");
    List<KafkaTopicPartition> mockFetchedPartitionsOnStartup = new ArrayList<>();
    for (int i = 0; i < numPartitions; i++) {
        mockFetchedPartitionsOnStartup.add(new KafkaTopicPartition("test-topic", i));
    }
    DummyFlinkKafkaConsumer<String>[] consumers = new DummyFlinkKafkaConsumer[initialParallelism];
    AbstractStreamOperatorTestHarness<String>[] testHarnesses = new AbstractStreamOperatorTestHarness[initialParallelism];
    List<String> testTopics = Collections.singletonList("test-topic");
    for (int i = 0; i < initialParallelism; i++) {
        TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, initialParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsOnStartup));
        consumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
        testHarnesses[i] = createTestHarness(consumers[i], initialParallelism, i);
        // initializeState() is always called, null signals that we didn't restore
        testHarnesses[i].initializeEmptyState();
        testHarnesses[i].open();
    }
    Map<KafkaTopicPartition, Long> globalSubscribedPartitions = new HashMap<>();
    for (int i = 0; i < initialParallelism; i++) {
        Map<KafkaTopicPartition, Long> subscribedPartitions = consumers[i].getSubscribedPartitionsToStartOffsets();
        // make sure that no one else is subscribed to these partitions
        for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
            assertThat(globalSubscribedPartitions, not(hasKey(partition)));
        }
        globalSubscribedPartitions.putAll(subscribedPartitions);
    }
    assertThat(globalSubscribedPartitions.values(), hasSize(numPartitions));
    assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(globalSubscribedPartitions.keySet())));
    OperatorSubtaskState[] state = new OperatorSubtaskState[initialParallelism];
    for (int i = 0; i < initialParallelism; i++) {
        state[i] = testHarnesses[i].snapshot(0, 0);
    }
    OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(state);
    // -----------------------------------------------------------------------------------------
    // restore
    List<KafkaTopicPartition> mockFetchedPartitionsAfterRestore = new ArrayList<>();
    for (int i = 0; i < restoredNumPartitions; i++) {
        mockFetchedPartitionsAfterRestore.add(new KafkaTopicPartition("test-topic", i));
    }
    DummyFlinkKafkaConsumer<String>[] restoredConsumers = new DummyFlinkKafkaConsumer[restoredParallelism];
    AbstractStreamOperatorTestHarness<String>[] restoredTestHarnesses = new AbstractStreamOperatorTestHarness[restoredParallelism];
    for (int i = 0; i < restoredParallelism; i++) {
        OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(mergedState, maxParallelism, initialParallelism, restoredParallelism, i);
        TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, restoredParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsAfterRestore));
        restoredConsumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
        restoredTestHarnesses[i] = createTestHarness(restoredConsumers[i], restoredParallelism, i);
        // initializeState() is always called, null signals that we didn't restore
        restoredTestHarnesses[i].initializeState(initState);
        restoredTestHarnesses[i].open();
    }
    Map<KafkaTopicPartition, Long> restoredGlobalSubscribedPartitions = new HashMap<>();
    for (int i = 0; i < restoredParallelism; i++) {
        Map<KafkaTopicPartition, Long> subscribedPartitions = restoredConsumers[i].getSubscribedPartitionsToStartOffsets();
        // make sure that no one else is subscribed to these partitions
        for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
            assertThat(restoredGlobalSubscribedPartitions, not(hasKey(partition)));
        }
        restoredGlobalSubscribedPartitions.putAll(subscribedPartitions);
    }
    assertThat(restoredGlobalSubscribedPartitions.values(), hasSize(restoredNumPartitions));
    assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(restoredGlobalSubscribedPartitions.keySet())));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) TestPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer) OptionalLong(java.util.OptionalLong) KafkaTopicsDescriptor(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor)

Aggregations

ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 OptionalLong (java.util.OptionalLong)2 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)2 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)2 KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)2 KafkaTopicsDescriptor (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor)2 TestPartitionDiscoverer (org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer)2 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)2 Serializable (java.io.Serializable)1 ArrayDeque (java.util.ArrayDeque)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 Collectors (java.util.stream.Collectors)1