Search in sources :

Example 6 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class SimpleConsumerThread method requestAndSetEarliestOrLatestOffsetsFromKafka.

/**
	 * For a set of partitions, if a partition is set with the special offsets {@link OffsetRequest#EarliestTime()}
	 * or {@link OffsetRequest#LatestTime()}, replace them with actual offsets requested via a Kafka consumer.
	 *
	 * @param consumer The consumer connected to lead broker
	 * @param partitions The list of partitions we need offsets for
	 */
private static void requestAndSetEarliestOrLatestOffsetsFromKafka(SimpleConsumer consumer, List<KafkaTopicPartitionState<TopicAndPartition>> partitions) throws Exception {
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<>();
    for (KafkaTopicPartitionState<TopicAndPartition> part : partitions) {
        if (part.getOffset() == OffsetRequest.EarliestTime() || part.getOffset() == OffsetRequest.LatestTime()) {
            requestInfo.put(part.getKafkaPartitionHandle(), new PartitionOffsetRequestInfo(part.getOffset(), 1));
        }
    }
    requestAndSetOffsetsFromKafka(consumer, partitions, requestInfo);
}
Also used : HashMap(java.util.HashMap) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition)

Example 7 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class Kafka08Fetcher method findLeaderForPartitions.

/**
	 * Find leaders for the partitions
	 *
	 * From a high level, the method does the following:
	 *	 - Get a list of FetchPartitions (usually only a few partitions)
	 *	 - Get the list of topics from the FetchPartitions list and request the partitions for the topics. (Kafka doesn't support getting leaders for a set of partitions)
	 *	 - Build a Map<Leader, List<FetchPartition>> where only the requested partitions are contained.
	 *
	 * @param partitionsToAssign fetch partitions list
	 * @return leader to partitions map
	 */
private static Map<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> findLeaderForPartitions(List<KafkaTopicPartitionState<TopicAndPartition>> partitionsToAssign, Properties kafkaProperties) throws Exception {
    if (partitionsToAssign.isEmpty()) {
        throw new IllegalArgumentException("Leader request for empty partitions list");
    }
    LOG.info("Refreshing leader information for partitions {}", partitionsToAssign);
    // this request is based on the topic names
    PartitionInfoFetcher infoFetcher = new PartitionInfoFetcher(getTopics(partitionsToAssign), kafkaProperties);
    infoFetcher.start();
    // NOTE: The kafka client apparently locks itself up sometimes
    // when it is interrupted, so we run it only in a separate thread.
    // since it sometimes refuses to shut down, we resort to the admittedly harsh
    // means of killing the thread after a timeout.
    KillerWatchDog watchDog = new KillerWatchDog(infoFetcher, 60000);
    watchDog.start();
    // this list contains ALL partitions of the requested topics
    List<KafkaTopicPartitionLeader> topicPartitionWithLeaderList = infoFetcher.getPartitions();
    // copy list to track unassigned partitions
    List<KafkaTopicPartitionState<TopicAndPartition>> unassignedPartitions = new ArrayList<>(partitionsToAssign);
    // final mapping from leader -> list(fetchPartition)
    Map<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> leaderToPartitions = new HashMap<>();
    for (KafkaTopicPartitionLeader partitionLeader : topicPartitionWithLeaderList) {
        if (unassignedPartitions.size() == 0) {
            // we are done: all partitions are assigned
            break;
        }
        Iterator<KafkaTopicPartitionState<TopicAndPartition>> unassignedPartitionsIterator = unassignedPartitions.iterator();
        while (unassignedPartitionsIterator.hasNext()) {
            KafkaTopicPartitionState<TopicAndPartition> unassignedPartition = unassignedPartitionsIterator.next();
            if (unassignedPartition.getKafkaTopicPartition().equals(partitionLeader.getTopicPartition())) {
                // we found the leader for one of the fetch partitions
                Node leader = partitionLeader.getLeader();
                List<KafkaTopicPartitionState<TopicAndPartition>> partitionsOfLeader = leaderToPartitions.get(leader);
                if (partitionsOfLeader == null) {
                    partitionsOfLeader = new ArrayList<>();
                    leaderToPartitions.put(leader, partitionsOfLeader);
                }
                partitionsOfLeader.add(unassignedPartition);
                // partition has been assigned
                unassignedPartitionsIterator.remove();
                break;
            }
        }
    }
    if (unassignedPartitions.size() > 0) {
        throw new RuntimeException("Unable to find a leader for partitions: " + unassignedPartitions);
    }
    LOG.debug("Partitions with assigned leaders {}", leaderToPartitions);
    return leaderToPartitions;
}
Also used : HashMap(java.util.HashMap) Node(org.apache.kafka.common.Node) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) TopicAndPartition(kafka.common.TopicAndPartition)

Example 8 with TopicAndPartition

use of kafka.common.TopicAndPartition in project cdap by caskdata.

the class KafkaConsumer method fetchOffsetBefore.

/**
   * Fetch offset before given time.
   * @param timeMillis offset to fetch before timeMillis.
   * @return Kafka message offset
   */
public long fetchOffsetBefore(long timeMillis) {
    TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = Maps.newHashMap();
    requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(timeMillis, 1));
    OffsetRequest request = new OffsetRequest(requestInfo, CurrentVersion(), clientName);
    SimpleConsumer consumer = getConsumer();
    OffsetResponse response = consumer.getOffsetsBefore(request);
    if (response.hasError()) {
        // Try once more
        closeConsumer();
        consumer = getConsumer();
        response = consumer.getOffsetsBefore(request);
        if (response.hasError()) {
            closeConsumer();
            throw new RuntimeException(String.format("Error fetching offset data from broker %s:%d for topic %s, partition %d. Error code: %d", consumer.host(), consumer.port(), topic, partition, response.errorCode(topic, partition)));
        }
    }
    long[] offsets = response.offsets(topic, partition);
    if (offsets.length > 0) {
        return offsets[0];
    }
    // Otherwise throw exception.
    if (timeMillis != kafka.api.OffsetRequest.EarliestTime()) {
        return fetchOffsetBefore(kafka.api.OffsetRequest.EarliestTime());
    }
    closeConsumer();
    throw new RuntimeException(String.format("Got zero offsets in offset response for time %s from broker %s:%d for topic %s, partition %d", timeMillis, consumer.host(), consumer.port(), topic, partition));
}
Also used : OffsetResponse(kafka.javaapi.OffsetResponse) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer) OffsetRequest(kafka.javaapi.OffsetRequest)

Example 9 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class SimpleConsumerThread method requestAndSetSpecificTimeOffsetsFromKafka.

// ------------------------------------------------------------------------
//  Kafka Request Utils
// ------------------------------------------------------------------------
/**
	 * Request offsets before a specific time for a set of partitions, via a Kafka consumer.
	 *
	 * @param consumer The consumer connected to lead broker
	 * @param partitions The list of partitions we need offsets for
	 * @param whichTime The type of time we are requesting. -1 and -2 are special constants (See OffsetRequest)
	 */
private static void requestAndSetSpecificTimeOffsetsFromKafka(SimpleConsumer consumer, List<KafkaTopicPartitionState<TopicAndPartition>> partitions, long whichTime) throws IOException {
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<>();
    for (KafkaTopicPartitionState<TopicAndPartition> part : partitions) {
        requestInfo.put(part.getKafkaPartitionHandle(), new PartitionOffsetRequestInfo(whichTime, 1));
    }
    requestAndSetOffsetsFromKafka(consumer, partitions, requestInfo);
}
Also used : HashMap(java.util.HashMap) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition)

Example 10 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class Kafka08Fetcher method runFetchLoop.

// ------------------------------------------------------------------------
//  Main Work Loop
// ------------------------------------------------------------------------
@Override
public void runFetchLoop() throws Exception {
    // the map from broker to the thread that is connected to that broker
    final Map<Node, SimpleConsumerThread<T>> brokerToThread = new HashMap<>();
    // this holds possible the exceptions from the concurrent broker connection threads
    final ExceptionProxy errorHandler = new ExceptionProxy(Thread.currentThread());
    // the offset handler handles the communication with ZooKeeper, to commit externally visible offsets
    final ZookeeperOffsetHandler zookeeperOffsetHandler = new ZookeeperOffsetHandler(kafkaConfig);
    this.zookeeperOffsetHandler = zookeeperOffsetHandler;
    PeriodicOffsetCommitter periodicCommitter = null;
    try {
        // values yet; replace those with actual offsets, according to what the sentinel value represent.
        for (KafkaTopicPartitionState<TopicAndPartition> partition : subscribedPartitionStates()) {
            if (partition.getOffset() == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) {
                // this will be replaced by an actual offset in SimpleConsumerThread
                partition.setOffset(OffsetRequest.EarliestTime());
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) {
                // this will be replaced by an actual offset in SimpleConsumerThread
                partition.setOffset(OffsetRequest.LatestTime());
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
                Long committedOffset = zookeeperOffsetHandler.getCommittedOffset(partition.getKafkaTopicPartition());
                if (committedOffset != null) {
                    // the committed offset in ZK represents the next record to process,
                    // so we subtract it by 1 to correctly represent internal state
                    partition.setOffset(committedOffset - 1);
                } else {
                    // if we can't find an offset for a partition in ZK when using GROUP_OFFSETS,
                    // we default to "auto.offset.reset" like the Kafka high-level consumer
                    LOG.warn("No group offset can be found for partition {} in Zookeeper;" + " resetting starting offset to 'auto.offset.reset'", partition);
                    partition.setOffset(invalidOffsetBehavior);
                }
            } else {
            // the partition already has a specific start offset and is ready to be consumed
            }
        }
        // start the periodic offset committer thread, if necessary
        if (autoCommitInterval > 0) {
            LOG.info("Starting periodic offset committer, with commit interval of {}ms", autoCommitInterval);
            periodicCommitter = new PeriodicOffsetCommitter(zookeeperOffsetHandler, subscribedPartitionStates(), errorHandler, autoCommitInterval);
            periodicCommitter.setName("Periodic Kafka partition offset committer");
            periodicCommitter.setDaemon(true);
            periodicCommitter.start();
        }
        // register offset metrics
        if (useMetrics) {
            final MetricGroup kafkaMetricGroup = runtimeContext.getMetricGroup().addGroup("KafkaConsumer");
            addOffsetStateGauge(kafkaMetricGroup);
        }
        // Main loop polling elements from the unassignedPartitions queue to the threads
        while (running) {
            // re-throw any exception from the concurrent fetcher threads
            errorHandler.checkAndThrowException();
            // wait for max 5 seconds trying to get partitions to assign
            // if threads shut down, this poll returns earlier, because the threads inject the
            // special marker into the queue
            List<KafkaTopicPartitionState<TopicAndPartition>> partitionsToAssign = unassignedPartitionsQueue.getBatchBlocking(5000);
            partitionsToAssign.remove(MARKER);
            if (!partitionsToAssign.isEmpty()) {
                LOG.info("Assigning {} partitions to broker threads", partitionsToAssign.size());
                Map<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeaders = findLeaderForPartitions(partitionsToAssign, kafkaConfig);
                // assign the partitions to the leaders (maybe start the threads)
                for (Map.Entry<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeader : partitionsWithLeaders.entrySet()) {
                    final Node leader = partitionsWithLeader.getKey();
                    final List<KafkaTopicPartitionState<TopicAndPartition>> partitions = partitionsWithLeader.getValue();
                    SimpleConsumerThread<T> brokerThread = brokerToThread.get(leader);
                    if (!running) {
                        break;
                    }
                    if (brokerThread == null || !brokerThread.getNewPartitionsQueue().isOpen()) {
                        // start new thread
                        brokerThread = createAndStartSimpleConsumerThread(partitions, leader, errorHandler);
                        brokerToThread.put(leader, brokerThread);
                    } else {
                        // put elements into queue of thread
                        ClosableBlockingQueue<KafkaTopicPartitionState<TopicAndPartition>> newPartitionsQueue = brokerThread.getNewPartitionsQueue();
                        for (KafkaTopicPartitionState<TopicAndPartition> fp : partitions) {
                            if (!newPartitionsQueue.addIfOpen(fp)) {
                                // we were unable to add the partition to the broker's queue
                                // the broker has closed in the meantime (the thread will shut down)
                                // create a new thread for connecting to this broker
                                List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions = new ArrayList<>();
                                seedPartitions.add(fp);
                                brokerThread = createAndStartSimpleConsumerThread(seedPartitions, leader, errorHandler);
                                brokerToThread.put(leader, brokerThread);
                                // update queue for the subsequent partitions
                                newPartitionsQueue = brokerThread.getNewPartitionsQueue();
                            }
                        }
                    }
                }
            } else {
                // there were no partitions to assign. Check if any broker threads shut down.
                // we get into this section of the code, if either the poll timed out, or the
                // blocking poll was woken up by the marker element
                Iterator<SimpleConsumerThread<T>> bttIterator = brokerToThread.values().iterator();
                while (bttIterator.hasNext()) {
                    SimpleConsumerThread<T> thread = bttIterator.next();
                    if (!thread.getNewPartitionsQueue().isOpen()) {
                        LOG.info("Removing stopped consumer thread {}", thread.getName());
                        bttIterator.remove();
                    }
                }
            }
            if (brokerToThread.size() == 0 && unassignedPartitionsQueue.isEmpty()) {
                if (unassignedPartitionsQueue.close()) {
                    LOG.info("All consumer threads are finished, there are no more unassigned partitions. Stopping fetcher");
                    break;
                }
            // we end up here if somebody added something to the queue in the meantime --> continue to poll queue again
            }
        }
    } catch (InterruptedException e) {
        // this may be thrown because an exception on one of the concurrent fetcher threads
        // woke this thread up. make sure we throw the root exception instead in that case
        errorHandler.checkAndThrowException();
        // no other root exception, throw the interrupted exception
        throw e;
    } finally {
        this.running = false;
        this.zookeeperOffsetHandler = null;
        // if we run a periodic committer thread, shut that down
        if (periodicCommitter != null) {
            periodicCommitter.shutdown();
        }
        // clear the interruption flag
        // this allows the joining on consumer threads (on best effort) to happen in
        // case the initial interrupt already
        Thread.interrupted();
        // make sure that in any case (completion, abort, error), all spawned threads are stopped
        try {
            int runningThreads;
            do {
                // check whether threads are alive and cancel them
                runningThreads = 0;
                Iterator<SimpleConsumerThread<T>> threads = brokerToThread.values().iterator();
                while (threads.hasNext()) {
                    SimpleConsumerThread<?> t = threads.next();
                    if (t.isAlive()) {
                        t.cancel();
                        runningThreads++;
                    } else {
                        threads.remove();
                    }
                }
                // wait for the threads to finish, before issuing a cancel call again
                if (runningThreads > 0) {
                    for (SimpleConsumerThread<?> t : brokerToThread.values()) {
                        t.join(500 / runningThreads + 1);
                    }
                }
            } while (runningThreads > 0);
        } catch (InterruptedException ignored) {
            // waiting for the thread shutdown apparently got interrupted
            // restore interrupted state and continue
            Thread.currentThread().interrupt();
        } catch (Throwable t) {
            // we catch all here to preserve the original exception
            LOG.error("Exception while shutting down consumer threads", t);
        }
        try {
            zookeeperOffsetHandler.close();
        } catch (Throwable t) {
            // we catch all here to preserve the original exception
            LOG.error("Exception while shutting down ZookeeperOffsetHandler", t);
        }
    }
}
Also used : HashMap(java.util.HashMap) Node(org.apache.kafka.common.Node) MetricGroup(org.apache.flink.metrics.MetricGroup) ArrayList(java.util.ArrayList) TopicAndPartition(kafka.common.TopicAndPartition) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TopicAndPartition (kafka.common.TopicAndPartition)16 PartitionOffsetRequestInfo (kafka.api.PartitionOffsetRequestInfo)11 HashMap (java.util.HashMap)9 OffsetRequest (kafka.javaapi.OffsetRequest)8 OffsetResponse (kafka.javaapi.OffsetResponse)7 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 SimpleConsumer (kafka.javaapi.consumer.SimpleConsumer)4 List (java.util.List)3 Map (java.util.Map)3 OffsetRequest (kafka.api.OffsetRequest)2 Node (org.apache.kafka.common.Node)2 PrestoException (com.facebook.presto.spi.PrestoException)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 SyncFailedException (java.io.SyncFailedException)1 ByteBuffer (java.nio.ByteBuffer)1 ClosedByInterruptException (java.nio.channels.ClosedByInterruptException)1 ClosedChannelException (java.nio.channels.ClosedChannelException)1 AccessDeniedException (java.nio.file.AccessDeniedException)1 SortedMap (java.util.SortedMap)1