use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumer08 method getPartitionsForTopic.
// ------------------------------------------------------------------------
// Kafka / ZooKeeper communication utilities
// ------------------------------------------------------------------------
/**
* Send request to Kafka to get partitions for topic.
*
* @param topics The name of the topics.
* @param properties The properties for the Kafka Consumer that is used to query the partitions for the topic.
*/
public static List<KafkaTopicPartitionLeader> getPartitionsForTopic(List<String> topics, Properties properties) {
String seedBrokersConfString = properties.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
final int numRetries = getInt(properties, GET_PARTITIONS_RETRIES_KEY, DEFAULT_GET_PARTITIONS_RETRIES);
checkNotNull(seedBrokersConfString, "Configuration property %s not set", ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
String[] seedBrokers = seedBrokersConfString.split(",");
List<KafkaTopicPartitionLeader> partitions = new ArrayList<>();
final String clientId = "flink-kafka-consumer-partition-lookup";
final int soTimeout = getInt(properties, "socket.timeout.ms", 30000);
final int bufferSize = getInt(properties, "socket.receive.buffer.bytes", 65536);
Random rnd = new Random();
retryLoop: for (int retry = 0; retry < numRetries; retry++) {
// we pick a seed broker randomly to avoid overloading the first broker with all the requests when the
// parallel source instances start. Still, we try all available brokers.
int index = rnd.nextInt(seedBrokers.length);
brokersLoop: for (int arrIdx = 0; arrIdx < seedBrokers.length; arrIdx++) {
String seedBroker = seedBrokers[index];
LOG.info("Trying to get topic metadata from broker {} in try {}/{}", seedBroker, retry, numRetries);
if (++index == seedBrokers.length) {
index = 0;
}
URL brokerUrl = NetUtils.getCorrectHostnamePort(seedBroker);
SimpleConsumer consumer = null;
try {
consumer = new SimpleConsumer(brokerUrl.getHost(), brokerUrl.getPort(), soTimeout, bufferSize, clientId);
TopicMetadataRequest req = new TopicMetadataRequest(topics);
kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
List<TopicMetadata> metaData = resp.topicsMetadata();
// clear in case we have an incomplete list from previous tries
partitions.clear();
for (TopicMetadata item : metaData) {
if (item.errorCode() != ErrorMapping.NoError()) {
// warn and try more brokers
LOG.warn("Error while getting metadata from broker " + seedBroker + " to find partitions " + "for " + topics.toString() + ". Error: " + ErrorMapping.exceptionFor(item.errorCode()).getMessage());
continue brokersLoop;
}
if (!topics.contains(item.topic())) {
LOG.warn("Received metadata from topic " + item.topic() + " even though it was not requested. Skipping ...");
continue brokersLoop;
}
for (PartitionMetadata part : item.partitionsMetadata()) {
Node leader = brokerToNode(part.leader());
KafkaTopicPartition ktp = new KafkaTopicPartition(item.topic(), part.partitionId());
KafkaTopicPartitionLeader pInfo = new KafkaTopicPartitionLeader(ktp, leader);
partitions.add(pInfo);
}
}
// leave the loop through the brokers
break retryLoop;
} catch (Exception e) {
//validates seed brokers in case of a ClosedChannelException
validateSeedBrokers(seedBrokers, e);
LOG.warn("Error communicating with broker {} to find partitions for {}. {} Message: {}", seedBroker, topics, e.getClass().getName(), e.getMessage());
LOG.debug("Detailed trace", e);
// we sleep a bit. Retrying immediately doesn't make sense in cases where Kafka is reorganizing the leader metadata
try {
Thread.sleep(500);
} catch (InterruptedException e1) {
// sleep shorter.
}
} finally {
if (consumer != null) {
consumer.close();
}
}
}
// brokers loop
}
// retries loop
return partitions;
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class Kafka09FetcherTest method testCommitDoesNotBlock.
@Test
public void testCommitDoesNotBlock() throws Exception {
// test data
final KafkaTopicPartition testPartition = new KafkaTopicPartition("test", 42);
final Map<KafkaTopicPartition, Long> testCommitData = new HashMap<>();
testCommitData.put(testPartition, 11L);
// to synchronize when the consumer is in its blocking method
final OneShotLatch sync = new OneShotLatch();
// ----- the mock consumer with blocking poll calls ----
final MultiShotLatch blockerLatch = new MultiShotLatch();
KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
@Override
public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) throws InterruptedException {
sync.trigger();
blockerLatch.await();
return ConsumerRecords.empty();
}
});
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) {
blockerLatch.trigger();
return null;
}
}).when(mockConsumer).wakeup();
// make sure the fetcher creates the mock consumer
whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
// ----- create the test fetcher -----
@SuppressWarnings("unchecked") SourceContext<String> sourceContext = mock(SourceContext.class);
Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition("test", 42), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
null, /* punctuated watermark extractor */
new TestProcessingTimeService(), 10, /* watermark interval */
this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
// ----- run the fetcher -----
final AtomicReference<Throwable> error = new AtomicReference<>();
final Thread fetcherRunner = new Thread("fetcher runner") {
@Override
public void run() {
try {
fetcher.runFetchLoop();
} catch (Throwable t) {
error.set(t);
}
}
};
fetcherRunner.start();
// wait until the fetcher has reached the method of interest
sync.await();
// ----- trigger the offset commit -----
final AtomicReference<Throwable> commitError = new AtomicReference<>();
final Thread committer = new Thread("committer runner") {
@Override
public void run() {
try {
fetcher.commitInternalOffsetsToKafka(testCommitData);
} catch (Throwable t) {
commitError.set(t);
}
}
};
committer.start();
// ----- ensure that the committer finishes in time -----
committer.join(30000);
assertFalse("The committer did not finish in time", committer.isAlive());
// ----- test done, wait till the fetcher is done for a clean shutdown -----
fetcher.cancel();
fetcherRunner.join();
// check that there were no errors in the fetcher
final Throwable fetcherError = error.get();
if (fetcherError != null && !(fetcherError instanceof Handover.ClosedException)) {
throw new Exception("Exception in the fetcher", fetcherError);
}
final Throwable committerError = commitError.get();
if (committerError != null) {
throw new Exception("Exception in the committer", committerError);
}
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class Kafka09FetcherTest method testCancellationWhenEmitBlocks.
@Test
public void testCancellationWhenEmitBlocks() throws Exception {
// ----- some test data -----
final String topic = "test-topic";
final int partition = 3;
final byte[] payload = new byte[] { 1, 2, 3, 4 };
final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(new ConsumerRecord<byte[], byte[]>(topic, partition, 15, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 16, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 17, payload, payload));
final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
data.put(new TopicPartition(topic, partition), records);
final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);
// ----- the test consumer -----
final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
@Override
public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
return consumerRecords;
}
});
whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
// ----- build a fetcher -----
BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
null, /* punctuated watermark extractor */
new TestProcessingTimeService(), 10, /* watermark interval */
this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
// ----- run the fetcher -----
final AtomicReference<Throwable> error = new AtomicReference<>();
final Thread fetcherRunner = new Thread("fetcher runner") {
@Override
public void run() {
try {
fetcher.runFetchLoop();
} catch (Throwable t) {
error.set(t);
}
}
};
fetcherRunner.start();
// wait until the thread started to emit records to the source context
sourceContext.waitTillHasBlocker();
// now we try to cancel the fetcher, including the interruption usually done on the task thread
// once it has finished, there must be no more thread blocked on the source context
fetcher.cancel();
fetcherRunner.interrupt();
fetcherRunner.join();
assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBase method setSubscribedPartitions.
@VisibleForTesting
void setSubscribedPartitions(List<KafkaTopicPartition> allSubscribedPartitions) {
checkNotNull(allSubscribedPartitions);
this.subscribedPartitionsToStartOffsets = new HashMap<>();
for (KafkaTopicPartition partition : allSubscribedPartitions) {
this.subscribedPartitionsToStartOffsets.put(partition, null);
}
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBase method initializeSubscribedPartitionsToStartOffsets.
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
/**
* Initializes {@link FlinkKafkaConsumerBase#subscribedPartitionsToStartOffsets} with appropriate
* values. The method decides which partitions this consumer instance should subscribe to, and also
* sets the initial offset each subscribed partition should be started from based on the configured startup mode.
*
* @param subscribedPartitionsToStartOffsets to subscribedPartitionsToStartOffsets to initialize
* @param kafkaTopicPartitions the complete list of all Kafka partitions
* @param indexOfThisSubtask the index of this consumer instance
* @param numParallelSubtasks total number of parallel consumer instances
* @param startupMode the configured startup mode for the consumer
* @param specificStartupOffsets specific partition offsets to start from
* (only relevant if startupMode is {@link StartupMode#SPECIFIC_OFFSETS})
*
* Note: This method is also exposed for testing.
*/
protected static void initializeSubscribedPartitionsToStartOffsets(Map<KafkaTopicPartition, Long> subscribedPartitionsToStartOffsets, List<KafkaTopicPartition> kafkaTopicPartitions, int indexOfThisSubtask, int numParallelSubtasks, StartupMode startupMode, Map<KafkaTopicPartition, Long> specificStartupOffsets) {
for (int i = 0; i < kafkaTopicPartitions.size(); i++) {
if (i % numParallelSubtasks == indexOfThisSubtask) {
if (startupMode != StartupMode.SPECIFIC_OFFSETS) {
subscribedPartitionsToStartOffsets.put(kafkaTopicPartitions.get(i), startupMode.getStateSentinel());
} else {
if (specificStartupOffsets == null) {
throw new IllegalArgumentException("Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS + ", but no specific offsets were specified");
}
KafkaTopicPartition partition = kafkaTopicPartitions.get(i);
Long specificOffset = specificStartupOffsets.get(partition);
if (specificOffset != null) {
// since the specified offsets represent the next record to read, we subtract
// it by one so that the initial state of the consumer will be correct
subscribedPartitionsToStartOffsets.put(partition, specificOffset - 1);
} else {
subscribedPartitionsToStartOffsets.put(partition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
}
}
}
}
}
Aggregations