Search in sources :

Example 16 with ByteBufferMessageSet

use of kafka.javaapi.message.ByteBufferMessageSet in project cdap by caskdata.

the class KafkaOffsetResolver method findStartOffset.

/**
   *
   * Performs a linear search to find the next offset of the message with smallest offset and log event time
   * equal to {@code targetTime}. Stop searching when the current message has log event time
   * later than {@code maxTime} or offset larger than {@code maxOffset}
   *
   * @return next offset of the message with smallest offset and log event time equal to targetTime,
   *         or next offset of the message with largest offset and timestamp smaller than
   *         (targetTime - EVENT_DELAY_MILLIS) if no message has log event time equal to targetTime,
   *         or startOffset if no event has log event time smaller than (targetTime - EVENT_DELAY_MILLIS)
   * @throws OffsetOutOfRangeException if the given offset is out of range.
   * @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
   *                                        for the given topic and partition.
   * @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
   * @throws UnknownServerException if the Kafka server responded with error.
   */
private long findStartOffset(SimpleConsumer consumer, int partition, long targetTime) throws KafkaException {
    String topic = config.getTopic();
    long minTime = targetTime - config.getEventDelayMillis();
    // The latest event time time we encounter before bailing the search
    long maxTime = targetTime + config.getEventDelayMillis();
    // The offset to start the search from
    long offset = KafkaUtil.getOffsetByTimestamp(consumer, topic, partition, minTime);
    long closestOffset = offset;
    boolean done = false;
    while (!done) {
        ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, config.getKafkaFetchBufferSize(), offset);
        done = true;
        for (MessageAndOffset messageAndOffset : messageSet) {
            done = false;
            offset = messageAndOffset.nextOffset();
            try {
                long timestamp = serializer.decodeEventTimestamp(messageAndOffset.message().payload());
                if (timestamp == targetTime) {
                    LOG.debug("Matching offset found in {}:{} at {} for timestamp {}", topic, partition, messageAndOffset.offset(), targetTime);
                    return offset;
                }
                if (timestamp < minTime) {
                    closestOffset = offset;
                }
                if (timestamp > maxTime) {
                    done = true;
                    break;
                }
            } catch (IOException e) {
                // This shouldn't happen. In case it happens (e.g. someone published some garbage), just skip the message.
                LOG.trace("Fail to decode logging event time {}:{} at offset {}. Skipping it.", topic, partition, messageAndOffset.offset(), e);
            }
        }
    }
    LOG.debug("Fail to find a log event with timestamp {} in {}:{}. " + "The largest offset with event timestamp smaller than {} (target event time minus event delay {}) is {}", targetTime, topic, partition, minTime, config.getEventDelayMillis(), closestOffset);
    return closestOffset;
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset) IOException(java.io.IOException) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet)

Example 17 with ByteBufferMessageSet

use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.

the class KafkaUtils method fetchMessages.

public static ByteBufferMessageSet fetchMessages(KafkaConfig config, SimpleConsumer consumer, Partition partition, long offset) throws TopicOffsetOutOfRangeException, FailedFetchException, RuntimeException {
    ByteBufferMessageSet msgs = null;
    String topic = partition.topic;
    int partitionId = partition.partition;
    FetchRequestBuilder builder = new FetchRequestBuilder();
    FetchRequest fetchRequest = builder.addFetch(topic, partitionId, offset, config.fetchSizeBytes).clientId(config.clientId).maxWait(config.fetchMaxWait).minBytes(config.minFetchByte).build();
    FetchResponse fetchResponse;
    try {
        fetchResponse = consumer.fetch(fetchRequest);
    } catch (Exception e) {
        if (e instanceof ConnectException || e instanceof SocketTimeoutException || e instanceof IOException || e instanceof UnresolvedAddressException) {
            LOG.warn("Network error when fetching messages:", e);
            throw new FailedFetchException(e);
        } else {
            throw new RuntimeException(e);
        }
    }
    if (fetchResponse.hasError()) {
        KafkaError error = KafkaError.getError(fetchResponse.errorCode(topic, partitionId));
        if (error.equals(KafkaError.OFFSET_OUT_OF_RANGE) && config.useStartOffsetTimeIfOffsetOutOfRange) {
            String msg = partition + " Got fetch request with offset out of range: [" + offset + "]";
            LOG.warn(msg);
            throw new TopicOffsetOutOfRangeException(msg);
        } else {
            String message = "Error fetching data from [" + partition + "] for topic [" + topic + "]: [" + error + "]";
            LOG.error(message);
            throw new FailedFetchException(message);
        }
    } else {
        msgs = fetchResponse.messageSet(topic, partitionId);
    }
    LOG.debug("Messages fetched. [config = {}], [consumer = {}], [partition = {}], [offset = {}], [msgs = {}]", config, consumer, partition, offset, msgs);
    return msgs;
}
Also used : FetchResponse(kafka.javaapi.FetchResponse) IOException(java.io.IOException) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) UnresolvedAddressException(java.nio.channels.UnresolvedAddressException) SocketTimeoutException(java.net.SocketTimeoutException) FetchRequestBuilder(kafka.api.FetchRequestBuilder) FetchRequest(kafka.api.FetchRequest) UnresolvedAddressException(java.nio.channels.UnresolvedAddressException) ConnectException(java.net.ConnectException)

Example 18 with ByteBufferMessageSet

use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.

the class PartitionManager method fill.

private void fill() {
    long start = System.currentTimeMillis();
    Long offset;
    // Are there failed tuples? If so, fetch those first.
    offset = this._failedMsgRetryManager.nextFailedMessageToRetry();
    final boolean processingNewTuples = (offset == null);
    if (processingNewTuples) {
        offset = _emittedToOffset;
    }
    ByteBufferMessageSet msgs = null;
    try {
        msgs = KafkaUtils.fetchMessages(_spoutConfig, _consumer, _partition, offset);
    } catch (TopicOffsetOutOfRangeException e) {
        offset = KafkaUtils.getOffset(_consumer, _partition.topic, _partition.partition, kafka.api.OffsetRequest.EarliestTime());
        //fix bug [STORM-643] : remove outdated failed offsets
        if (!processingNewTuples) {
            // For the case of EarliestTime it would be better to discard
            // all the failed offsets, that are earlier than actual EarliestTime
            // offset, since they are anyway not there.
            // These calls to broker API will be then saved.
            Set<Long> omitted = this._failedMsgRetryManager.clearOffsetsBefore(offset);
            // Omitted messages have not been acked and may be lost
            if (null != omitted) {
                _lostMessageCount.incrBy(omitted.size());
            }
            LOG.warn("Removing the failed offsets for {} that are out of range: {}", _partition, omitted);
        }
        if (offset > _emittedToOffset) {
            _lostMessageCount.incrBy(offset - _emittedToOffset);
            _emittedToOffset = offset;
            LOG.warn("{} Using new offset: {}", _partition, _emittedToOffset);
        }
        return;
    }
    long millis = System.currentTimeMillis() - start;
    _fetchAPILatencyMax.update(millis);
    _fetchAPILatencyMean.update(millis);
    _fetchAPICallCount.incr();
    if (msgs != null) {
        int numMessages = 0;
        for (MessageAndOffset msg : msgs) {
            final Long cur_offset = msg.offset();
            if (cur_offset < offset) {
                // Skip any old offsets.
                continue;
            }
            if (processingNewTuples || this._failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
                numMessages += 1;
                if (!_pending.containsKey(cur_offset)) {
                    _pending.put(cur_offset, System.currentTimeMillis());
                }
                _waitingToEmit.add(msg);
                _emittedToOffset = Math.max(msg.nextOffset(), _emittedToOffset);
                if (_failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
                    this._failedMsgRetryManager.retryStarted(cur_offset);
                }
            }
        }
        _fetchAPIMessageCount.incrBy(numMessages);
    }
}
Also used : ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet)

Example 19 with ByteBufferMessageSet

use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.

the class TridentKafkaEmitter method reEmitPartitionBatch.

/**
     * re-emit the batch described by the meta data provided
     *
     * @param attempt
     * @param collector
     * @param partition
     * @param meta
     */
private void reEmitPartitionBatch(TransactionAttempt attempt, TridentCollector collector, Partition partition, Map meta) {
    LOG.info("re-emitting batch, attempt " + attempt);
    String instanceId = (String) meta.get("instanceId");
    if (!_config.ignoreZkOffsets || instanceId.equals(_topologyInstanceId)) {
        SimpleConsumer consumer = _connections.register(partition);
        long offset = (Long) meta.get("offset");
        long nextOffset = (Long) meta.get("nextOffset");
        ByteBufferMessageSet msgs = null;
        msgs = fetchMessages(consumer, partition, offset);
        if (msgs != null) {
            for (MessageAndOffset msg : msgs) {
                if (offset == nextOffset) {
                    break;
                }
                if (offset > nextOffset) {
                    throw new RuntimeException("Error when re-emitting batch. overshot the end offset");
                }
                emit(collector, msg.message(), partition, msg.offset(), attempt);
                offset = msg.nextOffset();
            }
        }
    }
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer)

Example 20 with ByteBufferMessageSet

use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.

the class TridentKafkaEmitter method doEmitNewPartitionBatch.

private Map doEmitNewPartitionBatch(SimpleConsumer consumer, Partition partition, TridentCollector collector, Map lastMeta, TransactionAttempt attempt) {
    LOG.debug("Emitting new partition batch - [transaction = {}], [lastMeta = {}]", attempt, lastMeta);
    long offset;
    if (lastMeta != null) {
        String lastInstanceId = null;
        Map lastTopoMeta = (Map) lastMeta.get("topology");
        if (lastTopoMeta != null) {
            lastInstanceId = (String) lastTopoMeta.get("id");
        }
        if (_config.ignoreZkOffsets && !_topologyInstanceId.equals(lastInstanceId)) {
            offset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, _config.startOffsetTime);
        } else {
            offset = (Long) lastMeta.get("nextOffset");
        }
    } else {
        offset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, _config);
    }
    LOG.debug("[transaction = {}], [OFFSET = {}]", attempt, offset);
    ByteBufferMessageSet msgs = null;
    try {
        msgs = fetchMessages(consumer, partition, offset);
    } catch (TopicOffsetOutOfRangeException e) {
        long newOffset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, kafka.api.OffsetRequest.EarliestTime());
        LOG.warn("OffsetOutOfRange: Updating offset from offset = " + offset + " to offset = " + newOffset);
        offset = newOffset;
        msgs = KafkaUtils.fetchMessages(_config, consumer, partition, offset);
    }
    long endoffset = offset;
    for (MessageAndOffset msg : msgs) {
        emit(collector, msg.message(), partition, msg.offset(), attempt);
        endoffset = msg.nextOffset();
    }
    Map newMeta = new HashMap();
    newMeta.put("offset", offset);
    newMeta.put("nextOffset", endoffset);
    newMeta.put("instanceId", _topologyInstanceId);
    newMeta.put("partition", partition.partition);
    newMeta.put("broker", ImmutableMap.of("host", partition.host.host, "port", partition.host.port));
    newMeta.put("topic", partition.topic);
    newMeta.put("topology", ImmutableMap.of("name", _topologyName, "id", _topologyInstanceId));
    LOG.debug("[transaction = {}], [newMeta = {}]", attempt, newMeta);
    return newMeta;
}
Also used : HashMap(java.util.HashMap) TopicOffsetOutOfRangeException(org.apache.storm.kafka.TopicOffsetOutOfRangeException) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

ByteBufferMessageSet (kafka.javaapi.message.ByteBufferMessageSet)34 MessageAndOffset (kafka.message.MessageAndOffset)24 ArrayList (java.util.ArrayList)11 List (java.util.List)10 Test (org.junit.Test)10 IOException (java.io.IOException)6 FetchRequest (kafka.api.FetchRequest)5 Message (kafka.message.Message)5 ByteBuffer (java.nio.ByteBuffer)4 FetchRequestBuilder (kafka.api.FetchRequestBuilder)4 FetchResponse (kafka.javaapi.FetchResponse)4 SimpleConsumer (kafka.javaapi.consumer.SimpleConsumer)4 ConnectException (java.net.ConnectException)3 SocketTimeoutException (java.net.SocketTimeoutException)3 UnresolvedAddressException (java.nio.channels.UnresolvedAddressException)3 Future (java.util.concurrent.Future)2 Callback (org.apache.kafka.clients.producer.Callback)2 ProducerRecord (org.apache.kafka.clients.producer.ProducerRecord)2 SchemeAsMultiScheme (org.apache.storm.spout.SchemeAsMultiScheme)2 Tuple (org.apache.storm.tuple.Tuple)2