Search in sources :

Example 6 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumer08 method getPartitionsForTopic.

// ------------------------------------------------------------------------
//  Kafka / ZooKeeper communication utilities
// ------------------------------------------------------------------------
/**
	 * Send request to Kafka to get partitions for topic.
	 * 
	 * @param topics The name of the topics.
	 * @param properties The properties for the Kafka Consumer that is used to query the partitions for the topic. 
	 */
public static List<KafkaTopicPartitionLeader> getPartitionsForTopic(List<String> topics, Properties properties) {
    String seedBrokersConfString = properties.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
    final int numRetries = getInt(properties, GET_PARTITIONS_RETRIES_KEY, DEFAULT_GET_PARTITIONS_RETRIES);
    checkNotNull(seedBrokersConfString, "Configuration property %s not set", ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
    String[] seedBrokers = seedBrokersConfString.split(",");
    List<KafkaTopicPartitionLeader> partitions = new ArrayList<>();
    final String clientId = "flink-kafka-consumer-partition-lookup";
    final int soTimeout = getInt(properties, "socket.timeout.ms", 30000);
    final int bufferSize = getInt(properties, "socket.receive.buffer.bytes", 65536);
    Random rnd = new Random();
    retryLoop: for (int retry = 0; retry < numRetries; retry++) {
        // we pick a seed broker randomly to avoid overloading the first broker with all the requests when the
        // parallel source instances start. Still, we try all available brokers.
        int index = rnd.nextInt(seedBrokers.length);
        brokersLoop: for (int arrIdx = 0; arrIdx < seedBrokers.length; arrIdx++) {
            String seedBroker = seedBrokers[index];
            LOG.info("Trying to get topic metadata from broker {} in try {}/{}", seedBroker, retry, numRetries);
            if (++index == seedBrokers.length) {
                index = 0;
            }
            URL brokerUrl = NetUtils.getCorrectHostnamePort(seedBroker);
            SimpleConsumer consumer = null;
            try {
                consumer = new SimpleConsumer(brokerUrl.getHost(), brokerUrl.getPort(), soTimeout, bufferSize, clientId);
                TopicMetadataRequest req = new TopicMetadataRequest(topics);
                kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
                List<TopicMetadata> metaData = resp.topicsMetadata();
                // clear in case we have an incomplete list from previous tries
                partitions.clear();
                for (TopicMetadata item : metaData) {
                    if (item.errorCode() != ErrorMapping.NoError()) {
                        // warn and try more brokers
                        LOG.warn("Error while getting metadata from broker " + seedBroker + " to find partitions " + "for " + topics.toString() + ". Error: " + ErrorMapping.exceptionFor(item.errorCode()).getMessage());
                        continue brokersLoop;
                    }
                    if (!topics.contains(item.topic())) {
                        LOG.warn("Received metadata from topic " + item.topic() + " even though it was not requested. Skipping ...");
                        continue brokersLoop;
                    }
                    for (PartitionMetadata part : item.partitionsMetadata()) {
                        Node leader = brokerToNode(part.leader());
                        KafkaTopicPartition ktp = new KafkaTopicPartition(item.topic(), part.partitionId());
                        KafkaTopicPartitionLeader pInfo = new KafkaTopicPartitionLeader(ktp, leader);
                        partitions.add(pInfo);
                    }
                }
                // leave the loop through the brokers
                break retryLoop;
            } catch (Exception e) {
                //validates seed brokers in case of a ClosedChannelException
                validateSeedBrokers(seedBrokers, e);
                LOG.warn("Error communicating with broker {} to find partitions for {}. {} Message: {}", seedBroker, topics, e.getClass().getName(), e.getMessage());
                LOG.debug("Detailed trace", e);
                // we sleep a bit. Retrying immediately doesn't make sense in cases where Kafka is reorganizing the leader metadata
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e1) {
                // sleep shorter.
                }
            } finally {
                if (consumer != null) {
                    consumer.close();
                }
            }
        }
    // brokers loop
    }
    // retries loop
    return partitions;
}
Also used : TopicMetadataRequest(kafka.javaapi.TopicMetadataRequest) Node(org.apache.kafka.common.Node) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) URL(java.net.URL) ClosedChannelException(java.nio.channels.ClosedChannelException) UnknownHostException(java.net.UnknownHostException) TopicMetadata(kafka.javaapi.TopicMetadata) Random(java.util.Random) KafkaTopicPartitionLeader(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionLeader) PartitionMetadata(kafka.javaapi.PartitionMetadata) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer)

Example 7 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class Kafka09FetcherTest method testCommitDoesNotBlock.

@Test
public void testCommitDoesNotBlock() throws Exception {
    // test data
    final KafkaTopicPartition testPartition = new KafkaTopicPartition("test", 42);
    final Map<KafkaTopicPartition, Long> testCommitData = new HashMap<>();
    testCommitData.put(testPartition, 11L);
    // to synchronize when the consumer is in its blocking method
    final OneShotLatch sync = new OneShotLatch();
    // ----- the mock consumer with blocking poll calls ----
    final MultiShotLatch blockerLatch = new MultiShotLatch();
    KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
    when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {

        @Override
        public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) throws InterruptedException {
            sync.trigger();
            blockerLatch.await();
            return ConsumerRecords.empty();
        }
    });
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) {
            blockerLatch.trigger();
            return null;
        }
    }).when(mockConsumer).wakeup();
    // make sure the fetcher creates the mock consumer
    whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
    // ----- create the test fetcher -----
    @SuppressWarnings("unchecked") SourceContext<String> sourceContext = mock(SourceContext.class);
    Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition("test", 42), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
    KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
    final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
    null, /* punctuated watermark extractor */
    new TestProcessingTimeService(), 10, /* watermark interval */
    this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
    // ----- run the fetcher -----
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final Thread fetcherRunner = new Thread("fetcher runner") {

        @Override
        public void run() {
            try {
                fetcher.runFetchLoop();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    fetcherRunner.start();
    // wait until the fetcher has reached the method of interest
    sync.await();
    // ----- trigger the offset commit -----
    final AtomicReference<Throwable> commitError = new AtomicReference<>();
    final Thread committer = new Thread("committer runner") {

        @Override
        public void run() {
            try {
                fetcher.commitInternalOffsetsToKafka(testCommitData);
            } catch (Throwable t) {
                commitError.set(t);
            }
        }
    };
    committer.start();
    // ----- ensure that the committer finishes in time  -----
    committer.join(30000);
    assertFalse("The committer did not finish in time", committer.isAlive());
    // ----- test done, wait till the fetcher is done for a clean shutdown -----
    fetcher.cancel();
    fetcherRunner.join();
    // check that there were no errors in the fetcher
    final Throwable fetcherError = error.get();
    if (fetcherError != null && !(fetcherError instanceof Handover.ClosedException)) {
        throw new Exception("Exception in the fetcher", fetcherError);
    }
    final Throwable committerError = commitError.get();
    if (committerError != null) {
        throw new Exception("Exception in the committer", committerError);
    }
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) HashMap(java.util.HashMap) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Properties(java.util.Properties) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) Handover(org.apache.flink.streaming.connectors.kafka.internal.Handover) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) AtomicReference(java.util.concurrent.atomic.AtomicReference) KafkaConsumerThread(org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread) Kafka09Fetcher(org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Mockito.anyLong(org.mockito.Mockito.anyLong) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 8 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class Kafka09FetcherTest method testCancellationWhenEmitBlocks.

@Test
public void testCancellationWhenEmitBlocks() throws Exception {
    // ----- some test data -----
    final String topic = "test-topic";
    final int partition = 3;
    final byte[] payload = new byte[] { 1, 2, 3, 4 };
    final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(new ConsumerRecord<byte[], byte[]>(topic, partition, 15, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 16, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 17, payload, payload));
    final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
    data.put(new TopicPartition(topic, partition), records);
    final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);
    // ----- the test consumer -----
    final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
    when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {

        @Override
        public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
            return consumerRecords;
        }
    });
    whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
    // ----- build a fetcher -----
    BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
    Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
    KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
    final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
    null, /* punctuated watermark extractor */
    new TestProcessingTimeService(), 10, /* watermark interval */
    this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
    // ----- run the fetcher -----
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final Thread fetcherRunner = new Thread("fetcher runner") {

        @Override
        public void run() {
            try {
                fetcher.runFetchLoop();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    fetcherRunner.start();
    // wait until the thread started to emit records to the source context
    sourceContext.waitTillHasBlocker();
    // now we try to cancel the fetcher, including the interruption usually done on the task thread
    // once it has finished, there must be no more thread blocked on the source context
    fetcher.cancel();
    fetcherRunner.interrupt();
    fetcherRunner.join();
    assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Properties(java.util.Properties) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) List(java.util.List) AtomicReference(java.util.concurrent.atomic.AtomicReference) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) KafkaConsumerThread(org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread) Kafka09Fetcher(org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Mockito.anyLong(org.mockito.Mockito.anyLong) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 9 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumerBase method setSubscribedPartitions.

@VisibleForTesting
void setSubscribedPartitions(List<KafkaTopicPartition> allSubscribedPartitions) {
    checkNotNull(allSubscribedPartitions);
    this.subscribedPartitionsToStartOffsets = new HashMap<>();
    for (KafkaTopicPartition partition : allSubscribedPartitions) {
        this.subscribedPartitionsToStartOffsets.put(partition, null);
    }
}
Also used : KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Example 10 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumerBase method initializeSubscribedPartitionsToStartOffsets.

// ------------------------------------------------------------------------
//  Utilities
// ------------------------------------------------------------------------
/**
	 * Initializes {@link FlinkKafkaConsumerBase#subscribedPartitionsToStartOffsets} with appropriate
	 * values. The method decides which partitions this consumer instance should subscribe to, and also
	 * sets the initial offset each subscribed partition should be started from based on the configured startup mode.
	 *
	 * @param subscribedPartitionsToStartOffsets to subscribedPartitionsToStartOffsets to initialize
	 * @param kafkaTopicPartitions the complete list of all Kafka partitions
	 * @param indexOfThisSubtask the index of this consumer instance
	 * @param numParallelSubtasks total number of parallel consumer instances
	 * @param startupMode the configured startup mode for the consumer
	 * @param specificStartupOffsets specific partition offsets to start from
	 *                               (only relevant if startupMode is {@link StartupMode#SPECIFIC_OFFSETS})
	 *
	 * Note: This method is also exposed for testing.
	 */
protected static void initializeSubscribedPartitionsToStartOffsets(Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets, List<KafkaTopicPartition> kafkaTopicPartitions, int indexOfThisSubtask, int numParallelSubtasks, StartupMode startupMode, Map<KafkaTopicPartition, Long> specificStartupOffsets) {
    for (int i = 0; i < kafkaTopicPartitions.size(); i++) {
        if (i % numParallelSubtasks == indexOfThisSubtask) {
            if (startupMode != StartupMode.SPECIFIC_OFFSETS) {
                subscribedPartitionsToStartOffsets.put(kafkaTopicPartitions.get(i), startupMode.getStateSentinel());
            } else {
                if (specificStartupOffsets == null) {
                    throw new IllegalArgumentException("Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS + ", but no specific offsets were specified");
                }
                KafkaTopicPartition partition = kafkaTopicPartitions.get(i);
                Long specificOffset = specificStartupOffsets.get(partition);
                if (specificOffset != null) {
                    // since the specified offsets represent the next record to read, we subtract
                    // it by one so that the initial state of the consumer will be correct
                    subscribedPartitionsToStartOffsets.put(partition, specificOffset - 1);
                } else {
                    subscribedPartitionsToStartOffsets.put(partition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
                }
            }
        }
    }
}
Also used : KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)

Aggregations

KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)35 HashMap (java.util.HashMap)26 Test (org.junit.Test)18 ArrayList (java.util.ArrayList)14 Map (java.util.Map)8 Properties (java.util.Properties)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 UnregisteredMetricsGroup (org.apache.flink.metrics.groups.UnregisteredMetricsGroup)7 TestProcessingTimeService (org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService)7 TopicPartition (org.apache.kafka.common.TopicPartition)7 KafkaConsumerThread (org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread)6 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)6 KeyedDeserializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper)6 SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)6 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)6 InvocationOnMock (org.mockito.invocation.InvocationOnMock)6 OptionalLong (java.util.OptionalLong)5 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)5 Mockito.anyLong (org.mockito.Mockito.anyLong)5