Search in sources :

Example 16 with MessageAndOffset

use of kafka.message.MessageAndOffset in project cdap by caskdata.

the class KafkaOffsetResolver method findStartOffset.

/**
 * Performs a linear search to find the next offset of the message with smallest offset and log event time
 * equal to {@code targetTime}. Stop searching when the current message has log event time
 * later than {@code maxTime} or offset larger than {@code maxOffset}
 *
 * @return next offset of the message with smallest offset and log event time equal to targetTime,
 *         or next offset of the message with largest offset and timestamp smaller than
 *         (targetTime - EVENT_DELAY_MILLIS) if no message has log event time equal to targetTime,
 *         or startOffset if no event has log event time smaller than (targetTime - EVENT_DELAY_MILLIS)
 * @throws OffsetOutOfRangeException if the given offset is out of range.
 * @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
 *                                        for the given topic and partition.
 * @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
 * @throws UnknownServerException if the Kafka server responded with error.
 */
private long findStartOffset(SimpleConsumer consumer, int partition, long targetTime) throws KafkaException {
    String topic = config.getTopic();
    long minTime = targetTime - config.getEventDelayMillis();
    // The latest event time time we encounter before bailing the search
    long maxTime = targetTime + config.getEventDelayMillis();
    // The offset to start the search from
    long offset = KafkaUtil.getOffsetByTimestamp(consumer, topic, partition, minTime);
    long closestOffset = offset;
    boolean done = false;
    while (!done) {
        ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, config.getKafkaFetchBufferSize(), offset);
        done = true;
        for (MessageAndOffset messageAndOffset : messageSet) {
            done = false;
            offset = messageAndOffset.nextOffset();
            try {
                long timestamp = serializer.decodeEventTimestamp(messageAndOffset.message().payload());
                if (timestamp == targetTime) {
                    LOG.debug("Matching offset found in {}:{} at {} for timestamp {}", topic, partition, messageAndOffset.offset(), targetTime);
                    return offset;
                }
                if (timestamp < minTime) {
                    closestOffset = offset;
                }
                if (timestamp > maxTime) {
                    done = true;
                    break;
                }
            } catch (IOException e) {
                // This shouldn't happen. In case it happens (e.g. someone published some garbage), just skip the message.
                LOG.trace("Fail to decode logging event time {}:{} at offset {}. Skipping it.", topic, partition, messageAndOffset.offset(), e);
            }
        }
    }
    LOG.debug("Fail to find a log event with timestamp {} in {}:{}. " + "The largest offset with event timestamp smaller than {} (target event time minus event delay {}) is {}", targetTime, topic, partition, minTime, config.getEventDelayMillis(), closestOffset);
    return closestOffset;
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset) IOException(java.io.IOException) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet)

Example 17 with MessageAndOffset

use of kafka.message.MessageAndOffset in project cdap by caskdata.

the class KafkaOffsetResolver method getEventTimeByOffset.

/**
 * Fetch a log event with {@code requestOffset} and deserialize it to get the log event time.
 *
 * @return the log event time of the message with {@code requestOffset}
 * @throws NotFoundException If cannot find a valid log event message at the given offset
 * @throws OffsetOutOfRangeException if the given offset is out of range.
 * @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
 *                                        for the given topic and partition.
 * @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
 * @throws UnknownServerException if the Kafka server responded with error.
 */
private long getEventTimeByOffset(SimpleConsumer consumer, int partition, long requestOffset) throws NotFoundException {
    String topic = config.getTopic();
    ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, SINGLE_MESSAGE_MAX_SIZE, requestOffset);
    Iterator<MessageAndOffset> iterator = messageSet.iterator();
    if (!iterator.hasNext()) {
        throw new NotFoundException("No message found in " + topic + ":" + partition + " at offset " + requestOffset);
    }
    try {
        return serializer.decodeEventTimestamp(iterator.next().message().payload());
    } catch (IOException e) {
        // Fail to deserialize is the same as not found because in anywhere this is not the event we are looking for
        throw new NotFoundException("Invalid log event found in " + topic + ":" + partition + " at offset " + requestOffset);
    }
}
Also used : NotFoundException(co.cask.cdap.common.NotFoundException) MessageAndOffset(kafka.message.MessageAndOffset) IOException(java.io.IOException) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet)

Example 18 with MessageAndOffset

use of kafka.message.MessageAndOffset in project cdap by caskdata.

the class KafkaLogProcessorPipeline method fetchAll.

/**
 * Fetches messages from Kafka across all partitions simultaneously.
 */
private <T extends Map<Integer, Future<Iterable<MessageAndOffset>>>> T fetchAll(Int2LongMap offsets, T fetchFutures) {
    for (final int partition : config.getPartitions()) {
        final long offset = offsets.get(partition);
        fetchFutures.put(partition, fetchExecutor.submit(new Callable<Iterable<MessageAndOffset>>() {

            @Override
            public Iterable<MessageAndOffset> call() throws Exception {
                return fetchMessages(partition, offset);
            }
        }));
    }
    return fetchFutures;
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset) Checkpoint(co.cask.cdap.logging.meta.Checkpoint) Callable(java.util.concurrent.Callable)

Example 19 with MessageAndOffset

use of kafka.message.MessageAndOffset in project jstorm by alibaba.

the class PartitionConsumer method emit.

public EmitState emit(SpoutOutputCollector collector) {
    if (emittingMessages.isEmpty()) {
        fillMessages();
    }
    int count = 0;
    while (true) {
        MessageAndOffset toEmitMsg = emittingMessages.pollFirst();
        if (toEmitMsg == null) {
            return EmitState.EMIT_END;
        }
        count++;
        Iterable<List<Object>> tups = generateTuples(toEmitMsg.message());
        if (tups != null) {
            for (List<Object> tuple : tups) {
                LOG.debug("emit message {}", new String(Utils.toByteArray(toEmitMsg.message().payload())));
                collector.emit(tuple, new KafkaMessageId(partition, toEmitMsg.offset()));
            }
            if (count >= config.batchSendCount) {
                break;
            }
        } else {
            ack(toEmitMsg.offset());
        }
    }
    if (emittingMessages.isEmpty()) {
        return EmitState.EMIT_END;
    } else {
        return EmitState.EMIT_MORE;
    }
}
Also used : List(java.util.List) LinkedList(java.util.LinkedList) MessageAndOffset(kafka.message.MessageAndOffset)

Example 20 with MessageAndOffset

use of kafka.message.MessageAndOffset in project storm by apache.

the class PartitionManager method fill.

private void fill() {
    long start = System.currentTimeMillis();
    Long offset;
    // Are there failed tuples? If so, fetch those first.
    offset = this._failedMsgRetryManager.nextFailedMessageToRetry();
    final boolean processingNewTuples = (offset == null);
    if (processingNewTuples) {
        offset = _emittedToOffset;
    }
    ByteBufferMessageSet msgs = null;
    try {
        msgs = KafkaUtils.fetchMessages(_spoutConfig, _consumer, _partition, offset);
    } catch (TopicOffsetOutOfRangeException e) {
        offset = KafkaUtils.getOffset(_consumer, _partition.topic, _partition.partition, kafka.api.OffsetRequest.EarliestTime());
        //fix bug [STORM-643] : remove outdated failed offsets
        if (!processingNewTuples) {
            // For the case of EarliestTime it would be better to discard
            // all the failed offsets, that are earlier than actual EarliestTime
            // offset, since they are anyway not there.
            // These calls to broker API will be then saved.
            Set<Long> omitted = this._failedMsgRetryManager.clearOffsetsBefore(offset);
            // Omitted messages have not been acked and may be lost
            if (null != omitted) {
                _lostMessageCount.incrBy(omitted.size());
            }
            LOG.warn("Removing the failed offsets for {} that are out of range: {}", _partition, omitted);
        }
        if (offset > _emittedToOffset) {
            _lostMessageCount.incrBy(offset - _emittedToOffset);
            _emittedToOffset = offset;
            LOG.warn("{} Using new offset: {}", _partition, _emittedToOffset);
        }
        return;
    }
    long millis = System.currentTimeMillis() - start;
    _fetchAPILatencyMax.update(millis);
    _fetchAPILatencyMean.update(millis);
    _fetchAPICallCount.incr();
    if (msgs != null) {
        int numMessages = 0;
        for (MessageAndOffset msg : msgs) {
            final Long cur_offset = msg.offset();
            if (cur_offset < offset) {
                // Skip any old offsets.
                continue;
            }
            if (processingNewTuples || this._failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
                numMessages += 1;
                if (!_pending.containsKey(cur_offset)) {
                    _pending.put(cur_offset, System.currentTimeMillis());
                }
                _waitingToEmit.add(msg);
                _emittedToOffset = Math.max(msg.nextOffset(), _emittedToOffset);
                if (_failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
                    this._failedMsgRetryManager.retryStarted(cur_offset);
                }
            }
        }
        _fetchAPIMessageCount.incrBy(numMessages);
    }
}
Also used : ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet)

Aggregations

MessageAndOffset (kafka.message.MessageAndOffset)42 ByteBufferMessageSet (kafka.javaapi.message.ByteBufferMessageSet)25 ArrayList (java.util.ArrayList)14 List (java.util.List)13 IOException (java.io.IOException)9 ByteBuffer (java.nio.ByteBuffer)9 Test (org.junit.Test)8 Message (kafka.message.Message)7 FetchRequest (kafka.api.FetchRequest)6 FetchRequestBuilder (kafka.api.FetchRequestBuilder)6 FetchResponse (kafka.javaapi.FetchResponse)6 SimpleConsumer (kafka.javaapi.consumer.SimpleConsumer)6 Checkpoint (co.cask.cdap.logging.meta.Checkpoint)3 HashMap (java.util.HashMap)3 LinkedList (java.util.LinkedList)3 Map (java.util.Map)3 PartitionMetadata (kafka.javaapi.PartitionMetadata)2 SchemeAsMultiScheme (org.apache.storm.spout.SchemeAsMultiScheme)2 ILoggingEvent (ch.qos.logback.classic.spi.ILoggingEvent)1 NotFoundException (co.cask.cdap.common.NotFoundException)1