use of kafka.message.MessageAndOffset in project cdap by caskdata.
the class KafkaOffsetResolver method findStartOffset.
/**
* Performs a linear search to find the next offset of the message with smallest offset and log event time
* equal to {@code targetTime}. Stop searching when the current message has log event time
* later than {@code maxTime} or offset larger than {@code maxOffset}
*
* @return next offset of the message with smallest offset and log event time equal to targetTime,
* or next offset of the message with largest offset and timestamp smaller than
* (targetTime - EVENT_DELAY_MILLIS) if no message has log event time equal to targetTime,
* or startOffset if no event has log event time smaller than (targetTime - EVENT_DELAY_MILLIS)
* @throws OffsetOutOfRangeException if the given offset is out of range.
* @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
* for the given topic and partition.
* @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
* @throws UnknownServerException if the Kafka server responded with error.
*/
private long findStartOffset(SimpleConsumer consumer, int partition, long targetTime) throws KafkaException {
String topic = config.getTopic();
long minTime = targetTime - config.getEventDelayMillis();
// The latest event time time we encounter before bailing the search
long maxTime = targetTime + config.getEventDelayMillis();
// The offset to start the search from
long offset = KafkaUtil.getOffsetByTimestamp(consumer, topic, partition, minTime);
long closestOffset = offset;
boolean done = false;
while (!done) {
ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, config.getKafkaFetchBufferSize(), offset);
done = true;
for (MessageAndOffset messageAndOffset : messageSet) {
done = false;
offset = messageAndOffset.nextOffset();
try {
long timestamp = serializer.decodeEventTimestamp(messageAndOffset.message().payload());
if (timestamp == targetTime) {
LOG.debug("Matching offset found in {}:{} at {} for timestamp {}", topic, partition, messageAndOffset.offset(), targetTime);
return offset;
}
if (timestamp < minTime) {
closestOffset = offset;
}
if (timestamp > maxTime) {
done = true;
break;
}
} catch (IOException e) {
// This shouldn't happen. In case it happens (e.g. someone published some garbage), just skip the message.
LOG.trace("Fail to decode logging event time {}:{} at offset {}. Skipping it.", topic, partition, messageAndOffset.offset(), e);
}
}
}
LOG.debug("Fail to find a log event with timestamp {} in {}:{}. " + "The largest offset with event timestamp smaller than {} (target event time minus event delay {}) is {}", targetTime, topic, partition, minTime, config.getEventDelayMillis(), closestOffset);
return closestOffset;
}
use of kafka.message.MessageAndOffset in project cdap by caskdata.
the class KafkaOffsetResolver method getEventTimeByOffset.
/**
* Fetch a log event with {@code requestOffset} and deserialize it to get the log event time.
*
* @return the log event time of the message with {@code requestOffset}
* @throws NotFoundException If cannot find a valid log event message at the given offset
* @throws OffsetOutOfRangeException if the given offset is out of range.
* @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
* for the given topic and partition.
* @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
* @throws UnknownServerException if the Kafka server responded with error.
*/
private long getEventTimeByOffset(SimpleConsumer consumer, int partition, long requestOffset) throws NotFoundException {
String topic = config.getTopic();
ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, SINGLE_MESSAGE_MAX_SIZE, requestOffset);
Iterator<MessageAndOffset> iterator = messageSet.iterator();
if (!iterator.hasNext()) {
throw new NotFoundException("No message found in " + topic + ":" + partition + " at offset " + requestOffset);
}
try {
return serializer.decodeEventTimestamp(iterator.next().message().payload());
} catch (IOException e) {
// Fail to deserialize is the same as not found because in anywhere this is not the event we are looking for
throw new NotFoundException("Invalid log event found in " + topic + ":" + partition + " at offset " + requestOffset);
}
}
use of kafka.message.MessageAndOffset in project cdap by caskdata.
the class KafkaLogProcessorPipeline method fetchAll.
/**
* Fetches messages from Kafka across all partitions simultaneously.
*/
private <T extends Map<Integer, Future<Iterable<MessageAndOffset>>>> T fetchAll(Int2LongMap offsets, T fetchFutures) {
for (final int partition : config.getPartitions()) {
final long offset = offsets.get(partition);
fetchFutures.put(partition, fetchExecutor.submit(new Callable<Iterable<MessageAndOffset>>() {
@Override
public Iterable<MessageAndOffset> call() throws Exception {
return fetchMessages(partition, offset);
}
}));
}
return fetchFutures;
}
use of kafka.message.MessageAndOffset in project jstorm by alibaba.
the class PartitionConsumer method emit.
public EmitState emit(SpoutOutputCollector collector) {
if (emittingMessages.isEmpty()) {
fillMessages();
}
int count = 0;
while (true) {
MessageAndOffset toEmitMsg = emittingMessages.pollFirst();
if (toEmitMsg == null) {
return EmitState.EMIT_END;
}
count++;
Iterable<List<Object>> tups = generateTuples(toEmitMsg.message());
if (tups != null) {
for (List<Object> tuple : tups) {
LOG.debug("emit message {}", new String(Utils.toByteArray(toEmitMsg.message().payload())));
collector.emit(tuple, new KafkaMessageId(partition, toEmitMsg.offset()));
}
if (count >= config.batchSendCount) {
break;
}
} else {
ack(toEmitMsg.offset());
}
}
if (emittingMessages.isEmpty()) {
return EmitState.EMIT_END;
} else {
return EmitState.EMIT_MORE;
}
}
use of kafka.message.MessageAndOffset in project storm by apache.
the class PartitionManager method fill.
private void fill() {
long start = System.currentTimeMillis();
Long offset;
// Are there failed tuples? If so, fetch those first.
offset = this._failedMsgRetryManager.nextFailedMessageToRetry();
final boolean processingNewTuples = (offset == null);
if (processingNewTuples) {
offset = _emittedToOffset;
}
ByteBufferMessageSet msgs = null;
try {
msgs = KafkaUtils.fetchMessages(_spoutConfig, _consumer, _partition, offset);
} catch (TopicOffsetOutOfRangeException e) {
offset = KafkaUtils.getOffset(_consumer, _partition.topic, _partition.partition, kafka.api.OffsetRequest.EarliestTime());
//fix bug [STORM-643] : remove outdated failed offsets
if (!processingNewTuples) {
// For the case of EarliestTime it would be better to discard
// all the failed offsets, that are earlier than actual EarliestTime
// offset, since they are anyway not there.
// These calls to broker API will be then saved.
Set<Long> omitted = this._failedMsgRetryManager.clearOffsetsBefore(offset);
// Omitted messages have not been acked and may be lost
if (null != omitted) {
_lostMessageCount.incrBy(omitted.size());
}
LOG.warn("Removing the failed offsets for {} that are out of range: {}", _partition, omitted);
}
if (offset > _emittedToOffset) {
_lostMessageCount.incrBy(offset - _emittedToOffset);
_emittedToOffset = offset;
LOG.warn("{} Using new offset: {}", _partition, _emittedToOffset);
}
return;
}
long millis = System.currentTimeMillis() - start;
_fetchAPILatencyMax.update(millis);
_fetchAPILatencyMean.update(millis);
_fetchAPICallCount.incr();
if (msgs != null) {
int numMessages = 0;
for (MessageAndOffset msg : msgs) {
final Long cur_offset = msg.offset();
if (cur_offset < offset) {
// Skip any old offsets.
continue;
}
if (processingNewTuples || this._failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
numMessages += 1;
if (!_pending.containsKey(cur_offset)) {
_pending.put(cur_offset, System.currentTimeMillis());
}
_waitingToEmit.add(msg);
_emittedToOffset = Math.max(msg.nextOffset(), _emittedToOffset);
if (_failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
this._failedMsgRetryManager.retryStarted(cur_offset);
}
}
}
_fetchAPIMessageCount.incrBy(numMessages);
}
}
Aggregations