use of org.graylog.shaded.kafka09.message.Message in project graylog2-server by Graylog2.
the class LocalKafkaJournal method flushMessages.
private long flushMessages(List<Message> messages, long payloadSize) {
if (messages.isEmpty()) {
LOG.debug("No messages to flush, not trying to write an empty message set.");
return -1L;
}
final ByteBufferMessageSet messageSet = new ByteBufferMessageSet(JavaConversions.asScalaBuffer(messages).toSeq());
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to write ByteBufferMessageSet with size of {} bytes to journal", messageSet.sizeInBytes());
}
final LogAppendInfo appendInfo = kafkaLog.append(messageSet, true);
long lastWriteOffset = appendInfo.lastOffset();
if (LOG.isDebugEnabled()) {
LOG.debug("Wrote {} messages to journal: {} bytes (payload {} bytes), log position {} to {}", messages.size(), messageSet.sizeInBytes(), payloadSize, appendInfo.firstOffset(), lastWriteOffset);
}
writtenMessages.mark(messages.size());
return lastWriteOffset;
}
use of org.graylog.shaded.kafka09.message.Message in project graylog2-server by Graylog2.
the class LocalKafkaJournal method write.
/**
* Writes the list of entries to the journal.
*
* @param entries journal entries to be written
* @return the last position written to in the journal
*/
@Override
public long write(List<Entry> entries) {
try (Timer.Context ignored = writeTime.time()) {
long payloadSize = 0L;
long messageSetSize = 0L;
long lastWriteOffset = 0L;
final List<Message> messages = new ArrayList<>(entries.size());
for (final Entry entry : entries) {
final byte[] messageBytes = entry.getMessageBytes();
final byte[] idBytes = entry.getIdBytes();
payloadSize += messageBytes.length;
final Message newMessage = new Message(messageBytes, idBytes);
// Calculate the size of the new message in the message set by including the overhead for the log entry.
final int newMessageSize = MessageSet.entrySize(newMessage);
if (newMessageSize > maxMessageSize) {
writeDiscardedMessages.mark();
LOG.warn("Message with ID <{}> is too large to store in journal, skipping! (size: {} bytes / max: {} bytes)", new String(idBytes, StandardCharsets.UTF_8), newMessageSize, maxMessageSize);
payloadSize = 0;
continue;
}
// list of message to avoid a MessageSetSizeTooLargeException.
if ((messageSetSize + newMessageSize) > maxSegmentSize) {
if (LOG.isDebugEnabled()) {
LOG.debug("Flushing {} bytes message set with {} messages to avoid overflowing segment with max size of {} bytes", messageSetSize, messages.size(), maxSegmentSize);
}
lastWriteOffset = flushMessages(messages, payloadSize);
// Reset the messages list and size counters to start a new batch.
messages.clear();
messageSetSize = 0;
payloadSize = 0;
}
messages.add(newMessage);
messageSetSize += newMessageSize;
if (LOG.isTraceEnabled()) {
LOG.trace("Message {} contains bytes {}", bytesToHex(idBytes), bytesToHex(messageBytes));
}
}
// Flush the rest of the messages.
if (messages.size() > 0) {
lastWriteOffset = flushMessages(messages, payloadSize);
}
return lastWriteOffset;
}
}
use of org.graylog.shaded.kafka09.message.Message in project graylog2-server by Graylog2.
the class LocalKafkaJournal method read.
/**
* Read from the journal, starting at the given offset. If the underlying journal implementation returns an empty
* list of entries, it will be returned even if we know there are more entries in the journal.
*
* @param readOffset Offset to start reading at
* @param requestedMaximumCount Maximum number of entries to return.
* @return A list of entries
*/
public List<JournalReadEntry> read(long readOffset, long requestedMaximumCount) {
// Always read at least one!
final long maximumCount = Math.max(1, requestedMaximumCount);
long maxOffset = readOffset + maximumCount;
if (shuttingDown) {
return Collections.emptyList();
}
final List<JournalReadEntry> messages = new ArrayList<>(Ints.saturatedCast(maximumCount));
try (Timer.Context ignored = readTime.time()) {
final long logStartOffset = getLogStartOffset();
if (readOffset < logStartOffset) {
LOG.info("Read offset {} before start of log at {}, starting to read from the beginning of the journal.", readOffset, logStartOffset);
readOffset = logStartOffset;
maxOffset = readOffset + maximumCount;
}
LOG.debug("Requesting to read a maximum of {} messages (or 5MB) from the journal, offset interval [{}, {})", maximumCount, readOffset, maxOffset);
// TODO benchmark and make read-ahead strategy configurable for performance tuning
final MessageSet messageSet = kafkaLog.read(readOffset, 5 * 1024 * 1024, Option.<Object>apply(maxOffset)).messageSet();
final Iterator<MessageAndOffset> iterator = messageSet.iterator();
long firstOffset = Long.MIN_VALUE;
long lastOffset = Long.MIN_VALUE;
long totalBytes = 0;
while (iterator.hasNext()) {
final MessageAndOffset messageAndOffset = iterator.next();
if (firstOffset == Long.MIN_VALUE) {
firstOffset = messageAndOffset.offset();
}
// always remember the last seen offset for debug purposes below
lastOffset = messageAndOffset.offset();
final byte[] payloadBytes = ByteBufferUtils.readBytes(messageAndOffset.message().payload());
if (LOG.isTraceEnabled()) {
final byte[] keyBytes = ByteBufferUtils.readBytes(messageAndOffset.message().key());
LOG.trace("Read message {} contains {}", bytesToHex(keyBytes), bytesToHex(payloadBytes));
}
totalBytes += payloadBytes.length;
messages.add(new JournalReadEntry(payloadBytes, messageAndOffset.offset()));
// remember where to read from
nextReadOffset = messageAndOffset.nextOffset();
}
if (messages.isEmpty()) {
LOG.debug("No messages available to read for offset interval [{}, {}).", readOffset, maxOffset);
} else {
LOG.debug("Read {} messages, total payload size {}, from journal, offset interval [{}, {}], requested read at {}", messages.size(), totalBytes, firstOffset, lastOffset, readOffset);
}
} catch (OffsetOutOfRangeException e) {
// This is fine, the reader tries to read faster than the writer committed data. Next read will get the data.
LOG.debug("Offset out of range, no messages available starting at offset {}", readOffset);
} catch (Exception e) {
// sigh.
if (shuttingDown) {
LOG.debug("Caught exception during shutdown, ignoring it because we might have been blocked on a read.");
return Collections.emptyList();
}
// noinspection ConstantConditions
if (e instanceof ClosedByInterruptException) {
LOG.debug("Interrupted while reading from journal, during shutdown this is harmless and ignored.", e);
} else {
throw e;
}
}
readMessages.mark(messages.size());
return messages;
}
use of org.graylog.shaded.kafka09.message.Message in project graylog2-server by Graylog2.
the class KafkaTransport method doLaunchLegacy.
private void doLaunchLegacy(final MessageInput input) {
final Properties props = new Properties();
props.put("group.id", configuration.getString(CK_GROUP_ID, DEFAULT_GROUP_ID));
props.put("client.id", "gl2-" + nodeId.getShortNodeId() + "-" + input.getId());
props.put("fetch.min.bytes", String.valueOf(configuration.getInt(CK_FETCH_MIN_BYTES)));
props.put("fetch.wait.max.ms", String.valueOf(configuration.getInt(CK_FETCH_WAIT_MAX)));
props.put("zookeeper.connect", configuration.getString(CK_ZOOKEEPER));
props.put("auto.offset.reset", configuration.getString(CK_OFFSET_RESET, DEFAULT_OFFSET_RESET));
// Default auto commit interval is 60 seconds. Reduce to 1 second to minimize message duplication
// if something breaks.
props.put("auto.commit.interval.ms", "1000");
// Set a consumer timeout to avoid blocking on the consumer iterator.
props.put("consumer.timeout.ms", "1000");
insertCustomProperties(props);
final int numThreads = configuration.getInt(CK_THREADS);
final ConsumerConfig consumerConfig = new ConsumerConfig(props);
cc = Consumer.createJavaConsumerConnector(consumerConfig);
final TopicFilter filter = new Whitelist(configuration.getString(CK_TOPIC_FILTER));
final List<KafkaStream<byte[], byte[]>> streams = cc.createMessageStreamsByFilter(filter, numThreads);
// this is being used during shutdown to first stop all submitted jobs before committing the offsets back to zookeeper
// and then shutting down the connection.
// this is to avoid yanking away the connection from the consumer runnables
stopLatch = new CountDownLatch(streams.size());
for (final KafkaStream<byte[], byte[]> stream : streams) {
executor.submit(new Runnable() {
@Override
public void run() {
final ConsumerIterator<byte[], byte[]> consumerIterator = stream.iterator();
boolean retry;
do {
retry = false;
try {
// noinspection WhileLoopReplaceableByForEach
while (consumerIterator.hasNext()) {
if (paused) {
// we try not to spin here, so we wait until the lifecycle goes back to running.
LOG.debug("Message processing is paused, blocking until message processing is turned back on.");
Uninterruptibles.awaitUninterruptibly(pausedLatch);
}
// check for being stopped before actually getting the message, otherwise we could end up losing that message
if (stopped) {
break;
}
if (isThrottled()) {
blockUntilUnthrottled();
}
// process the message, this will immediately mark the message as having been processed. this gets tricky
// if we get an exception about processing it down below.
final MessageAndMetadata<byte[], byte[]> message = consumerIterator.next();
final byte[] bytes = message.message();
// it is possible that the message is null
if (bytes == null) {
continue;
}
totalBytesRead.addAndGet(bytes.length);
lastSecBytesReadTmp.addAndGet(bytes.length);
final RawMessage rawMessage = new RawMessage(bytes);
input.processRawMessage(rawMessage);
}
} catch (ConsumerTimeoutException e) {
// Happens when there is nothing to consume, retry to check again.
retry = true;
} catch (Exception e) {
LOG.error("Kafka consumer error, stopping consumer thread.", e);
}
} while (retry && !stopped);
// explicitly commit our offsets when stopping.
// this might trigger a couple of times, but it won't hurt
cc.commitOffsets();
stopLatch.countDown();
}
});
}
}
Aggregations