Search in sources :

Example 1 with MessageAndOffset

use of kafka.message.MessageAndOffset in project pinot by linkedin.

the class LLRealtimeSegmentDataManager method processKafkaEvents.

private void processKafkaEvents(Iterable<MessageAndOffset> messagesAndOffsets, Long highWatermark) {
    Iterator<MessageAndOffset> msgIterator = messagesAndOffsets.iterator();
    int indexedMessageCount = 0;
    int kafkaMessageCount = 0;
    boolean canTakeMore = true;
    GenericRow decodedRow = null;
    GenericRow transformedRow = null;
    while (!_shouldStop && !endCriteriaReached() && msgIterator.hasNext()) {
        if (!canTakeMore) {
            // The RealtimeSegmentImpl that we are pushing rows into has indicated that it cannot accept any more
            // rows. This can happen in one of two conditions:
            // 1. We are in INITIAL_CONSUMING state, and we somehow exceeded the max number of rows we are allowed to consume
            //    for this row. Something is seriously wrong, because endCriteriaReached() should have returned true when
            //    we hit the row limit.
            //    Throw an exception.
            //
            // 2. We are in CATCHING_UP state, and we legally hit this error due to Kafka unclean leader election where
            //    offsets get changed with higher generation numbers for some pinot servers but not others. So, if another
            //    server (who got a larger kafka offset) asked us to catch up to that offset, but we are connected to a
            //    broker who has smaller offsets, then we may try to push more rows into the buffer than maximum. This
            //    is a rare case, and we really don't know how to handle this at this time.
            //    Throw an exception.
            //
            segmentLogger.error("Buffer full with {} rows consumed (row limit {})", _numRowsConsumed, _segmentMaxRowCount);
            throw new RuntimeException("Realtime segment full");
        }
        // Index each message
        MessageAndOffset messageAndOffset = msgIterator.next();
        byte[] array = messageAndOffset.message().payload().array();
        int offset = messageAndOffset.message().payload().arrayOffset();
        int length = messageAndOffset.message().payloadSize();
        decodedRow = GenericRow.createOrReuseRow(decodedRow);
        decodedRow = _messageDecoder.decode(array, offset, length, decodedRow);
        // Update lag metric on the first message of each batch
        if (kafkaMessageCount == 0) {
            long messageOffset = messageAndOffset.offset();
            long offsetDifference = highWatermark - messageOffset;
            _serverMetrics.setValueOfTableGauge(_metricKeyName, ServerGauge.KAFKA_PARTITION_OFFSET_LAG, offsetDifference);
        }
        if (decodedRow != null) {
            transformedRow = GenericRow.createOrReuseRow(transformedRow);
            transformedRow = _fieldExtractor.transform(decodedRow, transformedRow);
            if (transformedRow != null) {
                _serverMetrics.addMeteredTableValue(_metricKeyName, ServerMeter.REALTIME_ROWS_CONSUMED, 1);
                indexedMessageCount++;
            } else {
                _serverMetrics.addMeteredTableValue(_metricKeyName, ServerMeter.INVALID_REALTIME_ROWS_DROPPED, 1);
            }
            canTakeMore = _realtimeSegment.index(transformedRow);
        } else {
            _serverMetrics.addMeteredTableValue(_metricKeyName, ServerMeter.INVALID_REALTIME_ROWS_DROPPED, 1);
        }
        _currentOffset = messageAndOffset.nextOffset();
        _numRowsConsumed++;
        kafkaMessageCount++;
    }
    updateCurrentDocumentCountMetrics();
    if (kafkaMessageCount != 0) {
        segmentLogger.debug("Indexed {} messages ({} messages read from Kafka) current offset {}", indexedMessageCount, kafkaMessageCount, _currentOffset);
        _serverMetrics.setValueOfTableGauge(_metricKeyName, ServerGauge.HIGHEST_KAFKA_OFFSET_CONSUMED, _currentOffset);
    } else {
        // If there were no messages to be fetched from Kafka, wait for a little bit as to avoid hammering the
        // Kafka broker
        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
    }
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) MessageAndOffset(kafka.message.MessageAndOffset)

Example 2 with MessageAndOffset

use of kafka.message.MessageAndOffset in project kafka by apache.

the class SimpleConsumerDemo method printMessages.

private static void printMessages(ByteBufferMessageSet messageSet) throws UnsupportedEncodingException {
    for (MessageAndOffset messageAndOffset : messageSet) {
        ByteBuffer payload = messageAndOffset.message().payload();
        byte[] bytes = new byte[payload.limit()];
        payload.get(bytes);
        System.out.println(new String(bytes, "UTF-8"));
    }
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset) ByteBuffer(java.nio.ByteBuffer)

Example 3 with MessageAndOffset

use of kafka.message.MessageAndOffset in project storm by apache.

the class PartitionManager method next.

//returns false if it's reached the end of current batch
public EmitState next(SpoutOutputCollector collector) {
    if (_waitingToEmit.isEmpty()) {
        fill();
    }
    while (true) {
        MessageAndOffset toEmit = _waitingToEmit.pollFirst();
        if (toEmit == null) {
            return EmitState.NO_EMITTED;
        }
        Iterable<List<Object>> tups;
        if (_spoutConfig.scheme instanceof MessageMetadataSchemeAsMultiScheme) {
            tups = KafkaUtils.generateTuples((MessageMetadataSchemeAsMultiScheme) _spoutConfig.scheme, toEmit.message(), _partition, toEmit.offset());
        } else {
            tups = KafkaUtils.generateTuples(_spoutConfig, toEmit.message(), _partition.topic);
        }
        if ((tups != null) && tups.iterator().hasNext()) {
            if (!Strings.isNullOrEmpty(_spoutConfig.outputStreamId)) {
                for (List<Object> tup : tups) {
                    collector.emit(_spoutConfig.outputStreamId, tup, new KafkaMessageId(_partition, toEmit.offset()));
                }
            } else {
                for (List<Object> tup : tups) {
                    collector.emit(tup, new KafkaMessageId(_partition, toEmit.offset()));
                }
            }
            break;
        } else {
            ack(toEmit.offset());
        }
    }
    if (!_waitingToEmit.isEmpty()) {
        return EmitState.EMITTED_MORE_LEFT;
    } else {
        return EmitState.EMITTED_END;
    }
}
Also used : MessageAndOffset(kafka.message.MessageAndOffset)

Example 4 with MessageAndOffset

use of kafka.message.MessageAndOffset in project storm by apache.

the class KafkaUtilsTest method generateTuplesWithKeyAndKeyValueScheme.

@Test
public void generateTuplesWithKeyAndKeyValueScheme() {
    config.scheme = new KeyValueSchemeAsMultiScheme(new StringKeyValueScheme());
    config.useStartOffsetTimeIfOffsetOutOfRange = false;
    String value = "value";
    String key = "key";
    createTopicAndSendMessage(key, value);
    ByteBufferMessageSet messageAndOffsets = getLastMessage();
    for (MessageAndOffset msg : messageAndOffsets) {
        Iterable<List<Object>> lists = KafkaUtils.generateTuples(config, msg.message(), config.topic);
        assertEquals(ImmutableMap.of(key, value), lists.iterator().next().get(0));
    }
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) Test(org.junit.Test)

Example 5 with MessageAndOffset

use of kafka.message.MessageAndOffset in project storm by apache.

the class TestUtils method verifyMessage.

public static boolean verifyMessage(String key, String message, KafkaTestBroker broker, SimpleConsumer simpleConsumer) {
    long lastMessageOffset = KafkaUtils.getOffset(simpleConsumer, TestUtils.TOPIC, 0, OffsetRequest.LatestTime()) - 1;
    ByteBufferMessageSet messageAndOffsets = KafkaUtils.fetchMessages(TestUtils.getKafkaConfig(broker), simpleConsumer, new Partition(Broker.fromString(broker.getBrokerConnectionString()), TestUtils.TOPIC, 0), lastMessageOffset);
    MessageAndOffset messageAndOffset = messageAndOffsets.iterator().next();
    Message kafkaMessage = messageAndOffset.message();
    ByteBuffer messageKeyBuffer = kafkaMessage.key();
    String keyString = null;
    String messageString = new String(Utils.toByteArray(kafkaMessage.payload()));
    if (messageKeyBuffer != null) {
        keyString = new String(Utils.toByteArray(messageKeyBuffer));
    }
    assertEquals(key, keyString);
    assertEquals(message, messageString);
    return true;
}
Also used : Message(kafka.message.Message) MessageAndOffset(kafka.message.MessageAndOffset) ByteBufferMessageSet(kafka.javaapi.message.ByteBufferMessageSet) ByteBuffer(java.nio.ByteBuffer)

Aggregations

MessageAndOffset (kafka.message.MessageAndOffset)42 ByteBufferMessageSet (kafka.javaapi.message.ByteBufferMessageSet)25 ArrayList (java.util.ArrayList)14 List (java.util.List)13 IOException (java.io.IOException)9 ByteBuffer (java.nio.ByteBuffer)9 Test (org.junit.Test)8 Message (kafka.message.Message)7 FetchRequest (kafka.api.FetchRequest)6 FetchRequestBuilder (kafka.api.FetchRequestBuilder)6 FetchResponse (kafka.javaapi.FetchResponse)6 SimpleConsumer (kafka.javaapi.consumer.SimpleConsumer)6 Checkpoint (co.cask.cdap.logging.meta.Checkpoint)3 HashMap (java.util.HashMap)3 LinkedList (java.util.LinkedList)3 Map (java.util.Map)3 PartitionMetadata (kafka.javaapi.PartitionMetadata)2 SchemeAsMultiScheme (org.apache.storm.spout.SchemeAsMultiScheme)2 ILoggingEvent (ch.qos.logback.classic.spi.ILoggingEvent)1 NotFoundException (co.cask.cdap.common.NotFoundException)1