Search in sources :

Example 1 with TopicAndPartition

use of kafka.common.TopicAndPartition in project pinot by linkedin.

the class SimpleConsumerWrapper method fetchPartitionOffset.

/**
   * Fetches the numeric Kafka offset for this partition for a symbolic name ("largest" or "smallest").
   *
   * @param requestedOffset Either "largest" or "smallest"
   * @param timeoutMillis Timeout in milliseconds
   * @throws java.util.concurrent.TimeoutException If the operation could not be completed within {@code timeoutMillis}
   * milliseconds
   * @return An offset
   */
public synchronized long fetchPartitionOffset(String requestedOffset, int timeoutMillis) throws java.util.concurrent.TimeoutException {
    Preconditions.checkNotNull(requestedOffset);
    final long offsetRequestTime;
    if (requestedOffset.equalsIgnoreCase("largest")) {
        offsetRequestTime = kafka.api.OffsetRequest.LatestTime();
    } else if (requestedOffset.equalsIgnoreCase("smallest")) {
        offsetRequestTime = kafka.api.OffsetRequest.EarliestTime();
    } else if (requestedOffset.equalsIgnoreCase("testDummy")) {
        return -1L;
    } else {
        throw new IllegalArgumentException("Unknown initial offset value " + requestedOffset);
    }
    int kafkaErrorCount = 0;
    final int MAX_KAFKA_ERROR_COUNT = 10;
    final long endTime = System.currentTimeMillis() + timeoutMillis;
    while (System.currentTimeMillis() < endTime) {
        // Try to get into a state where we're connected to Kafka
        while (_currentState.getStateValue() != ConsumerState.CONNECTED_TO_PARTITION_LEADER && System.currentTimeMillis() < endTime) {
            _currentState.process();
        }
        if (_currentState.getStateValue() != ConsumerState.CONNECTED_TO_PARTITION_LEADER && endTime <= System.currentTimeMillis()) {
            throw new TimeoutException();
        }
        // Send the offset request to Kafka
        OffsetRequest request = new OffsetRequest(Collections.singletonMap(new TopicAndPartition(_topic, _partition), new PartitionOffsetRequestInfo(offsetRequestTime, 1)), kafka.api.OffsetRequest.CurrentVersion(), _clientId);
        OffsetResponse offsetResponse;
        try {
            offsetResponse = _simpleConsumer.getOffsetsBefore(request);
        } catch (Exception e) {
            _currentState.handleConsumerException(e);
            continue;
        }
        final short errorCode = offsetResponse.errorCode(_topic, _partition);
        if (errorCode == Errors.NONE.code()) {
            long offset = offsetResponse.offsets(_topic, _partition)[0];
            if (offset == 0L) {
                LOGGER.warn("Fetched offset of 0 for topic {} and partition {}, is this a newly created topic?", _topic, _partition);
            }
            return offset;
        } else if (errorCode == Errors.LEADER_NOT_AVAILABLE.code()) {
            // If there is no leader, it'll take some time for a new leader to be elected, wait 100 ms before retrying
            Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        } else {
            // Retry after a short delay
            kafkaErrorCount++;
            if (MAX_KAFKA_ERROR_COUNT < kafkaErrorCount) {
                throw exceptionForKafkaErrorCode(errorCode);
            }
            Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        }
    }
    throw new TimeoutException();
}
Also used : OffsetResponse(kafka.javaapi.OffsetResponse) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition) TimeoutException(org.apache.kafka.common.errors.TimeoutException) IOException(java.io.IOException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) OffsetRequest(kafka.javaapi.OffsetRequest)

Example 2 with TopicAndPartition

use of kafka.common.TopicAndPartition in project storm by apache.

the class KafkaUtils method getOffset.

public static long getOffset(SimpleConsumer consumer, String topic, int partition, long startOffsetTime) {
    TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
    requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(startOffsetTime, 1));
    OffsetRequest request = new OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), consumer.clientId());
    long[] offsets = consumer.getOffsetsBefore(request).offsets(topic, partition);
    if (offsets.length > 0) {
        return offsets[0];
    } else {
        return NO_OFFSET;
    }
}
Also used : HashMap(java.util.HashMap) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition) OffsetRequest(kafka.javaapi.OffsetRequest)

Example 3 with TopicAndPartition

use of kafka.common.TopicAndPartition in project druid by druid-io.

the class KafkaSimpleConsumer method getOffset.

private long getOffset(boolean earliest) throws InterruptedException {
    TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
    requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(earliest ? kafka.api.OffsetRequest.EarliestTime() : kafka.api.OffsetRequest.LatestTime(), 1));
    OffsetRequest request = new OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientId);
    OffsetResponse response = null;
    try {
        response = consumer.getOffsetsBefore(request);
    } catch (Exception e) {
        ensureNotInterrupted(e);
        log.error(e, "caught exception in getOffsetsBefore [%s] - [%s]", topic, partitionId);
        return -1;
    }
    if (response.hasError()) {
        log.error("error fetching data Offset from the Broker [%s]. reason: [%s]", leaderBroker.host(), response.errorCode(topic, partitionId));
        return -1;
    }
    long[] offsets = response.offsets(topic, partitionId);
    return earliest ? offsets[0] : offsets[offsets.length - 1];
}
Also used : OffsetResponse(kafka.javaapi.OffsetResponse) HashMap(java.util.HashMap) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition) OffsetRequest(kafka.javaapi.OffsetRequest)

Example 4 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class SimpleConsumerThread method run.

// ------------------------------------------------------------------------
//  main work loop
// ------------------------------------------------------------------------
@Override
public void run() {
    LOG.info("Starting to fetch from {}", this.partitions);
    // set up the config values
    final String clientId = "flink-kafka-consumer-legacy-" + broker.id();
    try {
        // create the Kafka consumer that we actually use for fetching
        consumer = new SimpleConsumer(broker.host(), broker.port(), soTimeout, bufferSize, clientId);
        // replace earliest of latest starting offsets with actual offset values fetched from Kafka
        requestAndSetEarliestOrLatestOffsetsFromKafka(consumer, partitions);
        LOG.info("Starting to consume {} partitions with consumer thread {}", partitions.size(), getName());
        // Now, the actual work starts :-)
        int offsetOutOfRangeCount = 0;
        int reconnects = 0;
        while (running) {
            // ----------------------------------- partitions list maintenance ----------------------------
            // check queue for new partitions to read from:
            List<KafkaTopicPartitionState<TopicAndPartition>> newPartitions = newPartitionsQueue.pollBatch();
            if (newPartitions != null) {
                // found some new partitions for this thread's broker
                // the new partitions should already be assigned a starting offset
                checkAllPartitionsHaveDefinedStartingOffsets(newPartitions);
                // if the new partitions are to start from earliest or latest offsets,
                // we need to replace them with actual values from Kafka
                requestAndSetEarliestOrLatestOffsetsFromKafka(consumer, newPartitions);
                // add the new partitions (and check they are not already in there)
                for (KafkaTopicPartitionState<TopicAndPartition> newPartition : newPartitions) {
                    if (partitions.contains(newPartition)) {
                        throw new IllegalStateException("Adding partition " + newPartition + " to subscribed partitions even though it is already subscribed");
                    }
                    partitions.add(newPartition);
                }
                LOG.info("Adding {} new partitions to consumer thread {}", newPartitions.size(), getName());
                LOG.debug("Partitions list: {}", newPartitions);
            }
            if (partitions.size() == 0) {
                if (newPartitionsQueue.close()) {
                    // close succeeded. Closing thread
                    running = false;
                    LOG.info("Consumer thread {} does not have any partitions assigned anymore. Stopping thread.", getName());
                    // add the wake-up marker into the queue to make the main thread
                    // immediately wake up and termination faster
                    unassignedPartitions.add(MARKER);
                    break;
                } else {
                    // go to top of loop again and get the new partitions
                    continue;
                }
            }
            // ----------------------------------- request / response with kafka ----------------------------
            FetchRequestBuilder frb = new FetchRequestBuilder();
            frb.clientId(clientId);
            frb.maxWait(maxWait);
            frb.minBytes(minBytes);
            for (KafkaTopicPartitionState<?> partition : partitions) {
                frb.addFetch(partition.getKafkaTopicPartition().getTopic(), partition.getKafkaTopicPartition().getPartition(), // request the next record
                partition.getOffset() + 1, fetchSize);
            }
            kafka.api.FetchRequest fetchRequest = frb.build();
            LOG.debug("Issuing fetch request {}", fetchRequest);
            FetchResponse fetchResponse;
            try {
                fetchResponse = consumer.fetch(fetchRequest);
            } catch (Throwable cce) {
                //noinspection ConstantConditions
                if (cce instanceof ClosedChannelException) {
                    LOG.warn("Fetch failed because of ClosedChannelException.");
                    LOG.debug("Full exception", cce);
                    // retry a few times, then return ALL partitions for new leader lookup
                    if (++reconnects >= reconnectLimit) {
                        LOG.warn("Unable to reach broker after {} retries. Returning all current partitions", reconnectLimit);
                        for (KafkaTopicPartitionState<TopicAndPartition> fp : this.partitions) {
                            unassignedPartitions.add(fp);
                        }
                        this.partitions.clear();
                        // jump to top of loop: will close thread or subscribe to new partitions
                        continue;
                    }
                    try {
                        consumer.close();
                    } catch (Throwable t) {
                        LOG.warn("Error while closing consumer connection", t);
                    }
                    // delay & retry
                    Thread.sleep(100);
                    consumer = new SimpleConsumer(broker.host(), broker.port(), soTimeout, bufferSize, clientId);
                    // retry
                    continue;
                } else {
                    throw cce;
                }
            }
            reconnects = 0;
            if (fetchResponse == null) {
                throw new IOException("Fetch from Kafka failed (request returned null)");
            }
            if (fetchResponse.hasError()) {
                String exception = "";
                List<KafkaTopicPartitionState<TopicAndPartition>> partitionsToGetOffsetsFor = new ArrayList<>();
                // iterate over partitions to get individual error codes
                Iterator<KafkaTopicPartitionState<TopicAndPartition>> partitionsIterator = partitions.iterator();
                boolean partitionsRemoved = false;
                while (partitionsIterator.hasNext()) {
                    final KafkaTopicPartitionState<TopicAndPartition> fp = partitionsIterator.next();
                    short code = fetchResponse.errorCode(fp.getTopic(), fp.getPartition());
                    if (code == ErrorMapping.OffsetOutOfRangeCode()) {
                        // we were asked to read from an out-of-range-offset (maybe set wrong in Zookeeper)
                        // Kafka's high level consumer is resetting the offset according to 'auto.offset.reset'
                        partitionsToGetOffsetsFor.add(fp);
                    } else if (code == ErrorMapping.NotLeaderForPartitionCode() || code == ErrorMapping.LeaderNotAvailableCode() || code == ErrorMapping.BrokerNotAvailableCode() || code == ErrorMapping.UnknownCode()) {
                        // the broker we are connected to is not the leader for the partition.
                        LOG.warn("{} is not the leader of {}. Reassigning leader for partition", broker, fp);
                        LOG.debug("Error code = {}", code);
                        unassignedPartitions.add(fp);
                        // unsubscribe the partition ourselves
                        partitionsIterator.remove();
                        partitionsRemoved = true;
                    } else if (code != ErrorMapping.NoError()) {
                        exception += "\nException for " + fp.getTopic() + ":" + fp.getPartition() + ": " + StringUtils.stringifyException(ErrorMapping.exceptionFor(code));
                    }
                }
                if (partitionsToGetOffsetsFor.size() > 0) {
                    // safeguard against an infinite loop.
                    if (offsetOutOfRangeCount++ > 3) {
                        throw new RuntimeException("Found invalid offsets more than three times in partitions " + partitionsToGetOffsetsFor + " Exceptions: " + exception);
                    }
                    // get valid offsets for these partitions and try again.
                    LOG.warn("The following partitions had an invalid offset: {}", partitionsToGetOffsetsFor);
                    requestAndSetSpecificTimeOffsetsFromKafka(consumer, partitionsToGetOffsetsFor, invalidOffsetBehavior);
                    LOG.warn("The new partition offsets are {}", partitionsToGetOffsetsFor);
                    // jump back to create a new fetch request. The offset has not been touched.
                    continue;
                } else if (partitionsRemoved) {
                    // create new fetch request
                    continue;
                } else {
                    // partitions failed on an error
                    throw new IOException("Error while fetching from broker '" + broker + "': " + exception);
                }
            } else {
                // successful fetch, reset offsetOutOfRangeCount.
                offsetOutOfRangeCount = 0;
            }
            // ----------------------------------- process fetch response ----------------------------
            int messagesInFetch = 0;
            int deletedMessages = 0;
            Iterator<KafkaTopicPartitionState<TopicAndPartition>> partitionsIterator = partitions.iterator();
            partitionsLoop: while (partitionsIterator.hasNext()) {
                final KafkaTopicPartitionState<TopicAndPartition> currentPartition = partitionsIterator.next();
                final ByteBufferMessageSet messageSet = fetchResponse.messageSet(currentPartition.getTopic(), currentPartition.getPartition());
                for (MessageAndOffset msg : messageSet) {
                    if (running) {
                        messagesInFetch++;
                        final ByteBuffer payload = msg.message().payload();
                        final long offset = msg.offset();
                        if (offset <= currentPartition.getOffset()) {
                            // we have seen this message already
                            LOG.info("Skipping message with offset " + msg.offset() + " because we have seen messages until (including) " + currentPartition.getOffset() + " from topic/partition " + currentPartition.getTopic() + '/' + currentPartition.getPartition() + " already");
                            continue;
                        }
                        // If the message value is null, this represents a delete command for the message key.
                        // Log this and pass it on to the client who might want to also receive delete messages.
                        byte[] valueBytes;
                        if (payload == null) {
                            deletedMessages++;
                            valueBytes = null;
                        } else {
                            valueBytes = new byte[payload.remaining()];
                            payload.get(valueBytes);
                        }
                        // put key into byte array
                        byte[] keyBytes = null;
                        int keySize = msg.message().keySize();
                        if (keySize >= 0) {
                            // message().hasKey() is doing the same. We save one int deserialization
                            ByteBuffer keyPayload = msg.message().key();
                            keyBytes = new byte[keySize];
                            keyPayload.get(keyBytes);
                        }
                        final T value = deserializer.deserialize(keyBytes, valueBytes, currentPartition.getTopic(), currentPartition.getPartition(), offset);
                        if (deserializer.isEndOfStream(value)) {
                            // remove partition from subscribed partitions.
                            partitionsIterator.remove();
                            continue partitionsLoop;
                        }
                        owner.emitRecord(value, currentPartition, offset);
                    } else {
                        // no longer running
                        return;
                    }
                }
            }
            LOG.debug("This fetch contained {} messages ({} deleted messages)", messagesInFetch, deletedMessages);
        }
        if (!newPartitionsQueue.close()) {
            throw new Exception("Bug: Cleanly leaving fetcher thread without having a closed queue.");
        }
    } catch (Throwable t) {
        // report to the fetcher's error handler
        errorHandler.reportError(t);
    } finally {
        if (consumer != null) {
            // closing the consumer should not fail the program
            try {
                consumer.close();
            } catch (Throwable t) {
                LOG.error("Error while closing the Kafka simple consumer", t);
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) FetchRequestBuilder(kafka.api.FetchRequestBuilder) TopicAndPartition(kafka.common.TopicAndPartition) ClosedChannelException(java.nio.channels.ClosedChannelException) FetchResponse(kafka.javaapi.FetchResponse) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) ClosedChannelException(java.nio.channels.ClosedChannelException) IOException(java.io.IOException) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer)

Example 5 with TopicAndPartition

use of kafka.common.TopicAndPartition in project flink by apache.

the class SimpleConsumerThread method requestAndSetEarliestOrLatestOffsetsFromKafka.

/**
	 * For a set of partitions, if a partition is set with the special offsets {@link OffsetRequest#EarliestTime()}
	 * or {@link OffsetRequest#LatestTime()}, replace them with actual offsets requested via a Kafka consumer.
	 *
	 * @param consumer The consumer connected to lead broker
	 * @param partitions The list of partitions we need offsets for
	 */
private static void requestAndSetEarliestOrLatestOffsetsFromKafka(SimpleConsumer consumer, List<KafkaTopicPartitionState<TopicAndPartition>> partitions) throws Exception {
    Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<>();
    for (KafkaTopicPartitionState<TopicAndPartition> part : partitions) {
        if (part.getOffset() == OffsetRequest.EarliestTime() || part.getOffset() == OffsetRequest.LatestTime()) {
            requestInfo.put(part.getKafkaPartitionHandle(), new PartitionOffsetRequestInfo(part.getOffset(), 1));
        }
    }
    requestAndSetOffsetsFromKafka(consumer, partitions, requestInfo);
}
Also used : HashMap(java.util.HashMap) PartitionOffsetRequestInfo(kafka.api.PartitionOffsetRequestInfo) TopicAndPartition(kafka.common.TopicAndPartition)

Aggregations

TopicAndPartition (kafka.common.TopicAndPartition)20 PartitionOffsetRequestInfo (kafka.api.PartitionOffsetRequestInfo)14 HashMap (java.util.HashMap)12 OffsetResponse (kafka.javaapi.OffsetResponse)10 OffsetRequest (kafka.javaapi.OffsetRequest)9 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 SimpleConsumer (kafka.javaapi.consumer.SimpleConsumer)4 List (java.util.List)3 Map (java.util.Map)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 OffsetRequest (kafka.api.OffsetRequest)2 Node (org.apache.kafka.common.Node)2 PrestoException (com.facebook.presto.spi.PrestoException)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 SyncFailedException (java.io.SyncFailedException)1 ByteBuffer (java.nio.ByteBuffer)1 ClosedByInterruptException (java.nio.channels.ClosedByInterruptException)1 ClosedChannelException (java.nio.channels.ClosedChannelException)1 AccessDeniedException (java.nio.file.AccessDeniedException)1