use of kafka.javaapi.message.ByteBufferMessageSet in project cdap by caskdata.
the class KafkaOffsetResolver method findStartOffset.
/**
*
* Performs a linear search to find the next offset of the message with smallest offset and log event time
* equal to {@code targetTime}. Stop searching when the current message has log event time
* later than {@code maxTime} or offset larger than {@code maxOffset}
*
* @return next offset of the message with smallest offset and log event time equal to targetTime,
* or next offset of the message with largest offset and timestamp smaller than
* (targetTime - EVENT_DELAY_MILLIS) if no message has log event time equal to targetTime,
* or startOffset if no event has log event time smaller than (targetTime - EVENT_DELAY_MILLIS)
* @throws OffsetOutOfRangeException if the given offset is out of range.
* @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
* for the given topic and partition.
* @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
* @throws UnknownServerException if the Kafka server responded with error.
*/
private long findStartOffset(SimpleConsumer consumer, int partition, long targetTime) throws KafkaException {
String topic = config.getTopic();
long minTime = targetTime - config.getEventDelayMillis();
// The latest event time time we encounter before bailing the search
long maxTime = targetTime + config.getEventDelayMillis();
// The offset to start the search from
long offset = KafkaUtil.getOffsetByTimestamp(consumer, topic, partition, minTime);
long closestOffset = offset;
boolean done = false;
while (!done) {
ByteBufferMessageSet messageSet = KafkaUtil.fetchMessages(consumer, topic, partition, config.getKafkaFetchBufferSize(), offset);
done = true;
for (MessageAndOffset messageAndOffset : messageSet) {
done = false;
offset = messageAndOffset.nextOffset();
try {
long timestamp = serializer.decodeEventTimestamp(messageAndOffset.message().payload());
if (timestamp == targetTime) {
LOG.debug("Matching offset found in {}:{} at {} for timestamp {}", topic, partition, messageAndOffset.offset(), targetTime);
return offset;
}
if (timestamp < minTime) {
closestOffset = offset;
}
if (timestamp > maxTime) {
done = true;
break;
}
} catch (IOException e) {
// This shouldn't happen. In case it happens (e.g. someone published some garbage), just skip the message.
LOG.trace("Fail to decode logging event time {}:{} at offset {}. Skipping it.", topic, partition, messageAndOffset.offset(), e);
}
}
}
LOG.debug("Fail to find a log event with timestamp {} in {}:{}. " + "The largest offset with event timestamp smaller than {} (target event time minus event delay {}) is {}", targetTime, topic, partition, minTime, config.getEventDelayMillis(), closestOffset);
return closestOffset;
}
use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.
the class KafkaUtils method fetchMessages.
public static ByteBufferMessageSet fetchMessages(KafkaConfig config, SimpleConsumer consumer, Partition partition, long offset) throws TopicOffsetOutOfRangeException, FailedFetchException, RuntimeException {
ByteBufferMessageSet msgs = null;
String topic = partition.topic;
int partitionId = partition.partition;
FetchRequestBuilder builder = new FetchRequestBuilder();
FetchRequest fetchRequest = builder.addFetch(topic, partitionId, offset, config.fetchSizeBytes).clientId(config.clientId).maxWait(config.fetchMaxWait).minBytes(config.minFetchByte).build();
FetchResponse fetchResponse;
try {
fetchResponse = consumer.fetch(fetchRequest);
} catch (Exception e) {
if (e instanceof ConnectException || e instanceof SocketTimeoutException || e instanceof IOException || e instanceof UnresolvedAddressException) {
LOG.warn("Network error when fetching messages:", e);
throw new FailedFetchException(e);
} else {
throw new RuntimeException(e);
}
}
if (fetchResponse.hasError()) {
KafkaError error = KafkaError.getError(fetchResponse.errorCode(topic, partitionId));
if (error.equals(KafkaError.OFFSET_OUT_OF_RANGE) && config.useStartOffsetTimeIfOffsetOutOfRange) {
String msg = partition + " Got fetch request with offset out of range: [" + offset + "]";
LOG.warn(msg);
throw new TopicOffsetOutOfRangeException(msg);
} else {
String message = "Error fetching data from [" + partition + "] for topic [" + topic + "]: [" + error + "]";
LOG.error(message);
throw new FailedFetchException(message);
}
} else {
msgs = fetchResponse.messageSet(topic, partitionId);
}
LOG.debug("Messages fetched. [config = {}], [consumer = {}], [partition = {}], [offset = {}], [msgs = {}]", config, consumer, partition, offset, msgs);
return msgs;
}
use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.
the class PartitionManager method fill.
private void fill() {
long start = System.currentTimeMillis();
Long offset;
// Are there failed tuples? If so, fetch those first.
offset = this._failedMsgRetryManager.nextFailedMessageToRetry();
final boolean processingNewTuples = (offset == null);
if (processingNewTuples) {
offset = _emittedToOffset;
}
ByteBufferMessageSet msgs = null;
try {
msgs = KafkaUtils.fetchMessages(_spoutConfig, _consumer, _partition, offset);
} catch (TopicOffsetOutOfRangeException e) {
offset = KafkaUtils.getOffset(_consumer, _partition.topic, _partition.partition, kafka.api.OffsetRequest.EarliestTime());
//fix bug [STORM-643] : remove outdated failed offsets
if (!processingNewTuples) {
// For the case of EarliestTime it would be better to discard
// all the failed offsets, that are earlier than actual EarliestTime
// offset, since they are anyway not there.
// These calls to broker API will be then saved.
Set<Long> omitted = this._failedMsgRetryManager.clearOffsetsBefore(offset);
// Omitted messages have not been acked and may be lost
if (null != omitted) {
_lostMessageCount.incrBy(omitted.size());
}
LOG.warn("Removing the failed offsets for {} that are out of range: {}", _partition, omitted);
}
if (offset > _emittedToOffset) {
_lostMessageCount.incrBy(offset - _emittedToOffset);
_emittedToOffset = offset;
LOG.warn("{} Using new offset: {}", _partition, _emittedToOffset);
}
return;
}
long millis = System.currentTimeMillis() - start;
_fetchAPILatencyMax.update(millis);
_fetchAPILatencyMean.update(millis);
_fetchAPICallCount.incr();
if (msgs != null) {
int numMessages = 0;
for (MessageAndOffset msg : msgs) {
final Long cur_offset = msg.offset();
if (cur_offset < offset) {
// Skip any old offsets.
continue;
}
if (processingNewTuples || this._failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
numMessages += 1;
if (!_pending.containsKey(cur_offset)) {
_pending.put(cur_offset, System.currentTimeMillis());
}
_waitingToEmit.add(msg);
_emittedToOffset = Math.max(msg.nextOffset(), _emittedToOffset);
if (_failedMsgRetryManager.shouldReEmitMsg(cur_offset)) {
this._failedMsgRetryManager.retryStarted(cur_offset);
}
}
}
_fetchAPIMessageCount.incrBy(numMessages);
}
}
use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.
the class TridentKafkaEmitter method reEmitPartitionBatch.
/**
* re-emit the batch described by the meta data provided
*
* @param attempt
* @param collector
* @param partition
* @param meta
*/
private void reEmitPartitionBatch(TransactionAttempt attempt, TridentCollector collector, Partition partition, Map meta) {
LOG.info("re-emitting batch, attempt " + attempt);
String instanceId = (String) meta.get("instanceId");
if (!_config.ignoreZkOffsets || instanceId.equals(_topologyInstanceId)) {
SimpleConsumer consumer = _connections.register(partition);
long offset = (Long) meta.get("offset");
long nextOffset = (Long) meta.get("nextOffset");
ByteBufferMessageSet msgs = null;
msgs = fetchMessages(consumer, partition, offset);
if (msgs != null) {
for (MessageAndOffset msg : msgs) {
if (offset == nextOffset) {
break;
}
if (offset > nextOffset) {
throw new RuntimeException("Error when re-emitting batch. overshot the end offset");
}
emit(collector, msg.message(), partition, msg.offset(), attempt);
offset = msg.nextOffset();
}
}
}
}
use of kafka.javaapi.message.ByteBufferMessageSet in project storm by apache.
the class TridentKafkaEmitter method doEmitNewPartitionBatch.
private Map doEmitNewPartitionBatch(SimpleConsumer consumer, Partition partition, TridentCollector collector, Map lastMeta, TransactionAttempt attempt) {
LOG.debug("Emitting new partition batch - [transaction = {}], [lastMeta = {}]", attempt, lastMeta);
long offset;
if (lastMeta != null) {
String lastInstanceId = null;
Map lastTopoMeta = (Map) lastMeta.get("topology");
if (lastTopoMeta != null) {
lastInstanceId = (String) lastTopoMeta.get("id");
}
if (_config.ignoreZkOffsets && !_topologyInstanceId.equals(lastInstanceId)) {
offset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, _config.startOffsetTime);
} else {
offset = (Long) lastMeta.get("nextOffset");
}
} else {
offset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, _config);
}
LOG.debug("[transaction = {}], [OFFSET = {}]", attempt, offset);
ByteBufferMessageSet msgs = null;
try {
msgs = fetchMessages(consumer, partition, offset);
} catch (TopicOffsetOutOfRangeException e) {
long newOffset = KafkaUtils.getOffset(consumer, partition.topic, partition.partition, kafka.api.OffsetRequest.EarliestTime());
LOG.warn("OffsetOutOfRange: Updating offset from offset = " + offset + " to offset = " + newOffset);
offset = newOffset;
msgs = KafkaUtils.fetchMessages(_config, consumer, partition, offset);
}
long endoffset = offset;
for (MessageAndOffset msg : msgs) {
emit(collector, msg.message(), partition, msg.offset(), attempt);
endoffset = msg.nextOffset();
}
Map newMeta = new HashMap();
newMeta.put("offset", offset);
newMeta.put("nextOffset", endoffset);
newMeta.put("instanceId", _topologyInstanceId);
newMeta.put("partition", partition.partition);
newMeta.put("broker", ImmutableMap.of("host", partition.host.host, "port", partition.host.port));
newMeta.put("topic", partition.topic);
newMeta.put("topology", ImmutableMap.of("name", _topologyName, "id", _topologyInstanceId));
LOG.debug("[transaction = {}], [newMeta = {}]", attempt, newMeta);
return newMeta;
}
Aggregations