Search in sources :

Example 66 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.

the class KafkaSpoutEmitTest method testSpoutWillSkipPartitionsAtTheMaxUncommittedOffsetsLimit.

@Test
public void testSpoutWillSkipPartitionsAtTheMaxUncommittedOffsetsLimit() {
    // This verifies that partitions can't prevent each other from retrying tuples due to the maxUncommittedOffsets limit.
    try (SimulatedTime simulatedTime = new SimulatedTime()) {
        TopicPartition partitionTwo = new TopicPartition(SingleTopicKafkaSpoutConfiguration.TOPIC, 2);
        KafkaSpout<String, String> spout = SpoutWithMockedConsumerSetupHelper.setupSpout(spoutConfig, conf, contextMock, collectorMock, consumerMock, partition, partitionTwo);
        Map<TopicPartition, List<ConsumerRecord<String, String>>> records = new HashMap<>();
        // This is cheating a bit since maxPollRecords would normally spread this across multiple polls
        records.put(partition, SpoutWithMockedConsumerSetupHelper.createRecords(partition, 0, spoutConfig.getMaxUncommittedOffsets()));
        records.put(partitionTwo, SpoutWithMockedConsumerSetupHelper.createRecords(partitionTwo, 0, spoutConfig.getMaxUncommittedOffsets() + 1));
        int numMessages = spoutConfig.getMaxUncommittedOffsets() * 2 + 1;
        when(consumerMock.poll(anyLong())).thenReturn(new ConsumerRecords<>(records));
        for (int i = 0; i < numMessages; i++) {
            spout.nextTuple();
        }
        ArgumentCaptor<KafkaSpoutMessageId> messageIds = ArgumentCaptor.forClass(KafkaSpoutMessageId.class);
        verify(collectorMock, times(numMessages)).emit(anyString(), anyList(), messageIds.capture());
        // Now fail a tuple on partition one and verify that it is allowed to retry, because the failed tuple is below the maxUncommittedOffsets limit
        Optional<KafkaSpoutMessageId> failedMessageIdPartitionOne = messageIds.getAllValues().stream().filter(messageId -> messageId.partition() == partition.partition()).findAny();
        spout.fail(failedMessageIdPartitionOne.get());
        // Also fail the last tuple from partition two. Since the failed tuple is beyond the maxUncommittedOffsets limit, it should not be retried until earlier messages are acked.
        Optional<KafkaSpoutMessageId> failedMessagePartitionTwo = messageIds.getAllValues().stream().filter(messageId -> messageId.partition() == partitionTwo.partition()).max((msgId, msgId2) -> (int) (msgId.offset() - msgId2.offset()));
        spout.fail(failedMessagePartitionTwo.get());
        reset(collectorMock);
        Time.advanceTime(50);
        when(consumerMock.poll(anyLong())).thenReturn(new ConsumerRecords<>(Collections.singletonMap(partition, SpoutWithMockedConsumerSetupHelper.createRecords(partition, failedMessageIdPartitionOne.get().offset(), 1))));
        spout.nextTuple();
        verify(collectorMock, times(1)).emit(anyObject(), anyObject(), anyObject());
        InOrder inOrder = inOrder(consumerMock);
        inOrder.verify(consumerMock).seek(partition, failedMessageIdPartitionOne.get().offset());
        // Should not seek on the paused partition
        inOrder.verify(consumerMock, never()).seek(eq(partitionTwo), anyLong());
        inOrder.verify(consumerMock).pause(Collections.singleton(partitionTwo));
        inOrder.verify(consumerMock).poll(anyLong());
        inOrder.verify(consumerMock).resume(Collections.singleton(partitionTwo));
        reset(collectorMock);
        // Now also check that no more tuples are polled for, since both partitions are at their limits
        spout.nextTuple();
        verify(collectorMock, never()).emit(anyObject(), anyObject(), anyObject());
    }
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) BeforeEach(org.junit.jupiter.api.BeforeEach) SingleTopicKafkaSpoutConfiguration(org.apache.storm.kafka.spout.config.builder.SingleTopicKafkaSpoutConfiguration) SimulatedTime(org.apache.storm.utils.Time.SimulatedTime) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) ArgumentMatchers.eq(org.mockito.ArgumentMatchers.eq) ManualPartitioner(org.apache.storm.kafka.spout.subscription.ManualPartitioner) TopologyContext(org.apache.storm.task.TopologyContext) HashMap(java.util.HashMap) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) SingleTopicKafkaSpoutConfiguration.createKafkaSpoutConfigBuilder(org.apache.storm.kafka.spout.config.builder.SingleTopicKafkaSpoutConfiguration.createKafkaSpoutConfigBuilder) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) Map(java.util.Map) TopicPartition(org.apache.kafka.common.TopicPartition) InOrder(org.mockito.InOrder) ArgumentMatchers.anyObject(org.mockito.ArgumentMatchers.anyObject) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) Mockito.verify(org.mockito.Mockito.verify) Time(org.apache.storm.utils.Time) Test(org.junit.jupiter.api.Test) Mockito.never(org.mockito.Mockito.never) List(java.util.List) TopicFilter(org.apache.storm.kafka.spout.subscription.TopicFilter) Mockito.inOrder(org.mockito.Mockito.inOrder) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Optional(java.util.Optional) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) Mockito.reset(org.mockito.Mockito.reset) Collections(java.util.Collections) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mockito.mock(org.mockito.Mockito.mock) SimulatedTime(org.apache.storm.utils.Time.SimulatedTime) InOrder(org.mockito.InOrder) HashMap(java.util.HashMap) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) TopicPartition(org.apache.kafka.common.TopicPartition) ArrayList(java.util.ArrayList) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) Test(org.junit.jupiter.api.Test)

Example 67 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.

the class SimpleRecordTranslatorTest method testBasic.

@Test
public void testBasic() {
    SimpleRecordTranslator<String, String> trans = new SimpleRecordTranslator<>((r) -> new Values(r.value()), new Fields("value"));
    assertEquals(Arrays.asList("default"), trans.streams());
    ConsumerRecord<String, String> cr = new ConsumerRecord<>("TOPIC", 100, 100, "THE KEY", "THE VALUE");
    assertEquals(Arrays.asList("THE VALUE"), trans.apply(cr));
}
Also used : Fields(org.apache.storm.tuple.Fields) Values(org.apache.storm.tuple.Values) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 68 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.

the class KafkaSpout method commitOffsetsForAckedTuples.

private void commitOffsetsForAckedTuples() {
    final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>();
    for (Map.Entry<TopicPartition, OffsetManager> tpOffset : offsetManagers.entrySet()) {
        final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset(commitMetadataManager.getCommitMetadata());
        if (nextCommitOffset != null) {
            nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset);
        }
    }
    // Commit offsets that are ready to be committed for every topic partition
    if (!nextCommitOffsets.isEmpty()) {
        consumer.commitSync(nextCommitOffsets);
        LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets);
        // in the prior loop, but the multiple network calls should be more expensive than iterating twice over a small loop
        for (Map.Entry<TopicPartition, OffsetAndMetadata> tpOffset : nextCommitOffsets.entrySet()) {
            // Update the OffsetManager for each committed partition, and update numUncommittedOffsets
            final TopicPartition tp = tpOffset.getKey();
            long position = consumer.position(tp);
            long committedOffset = tpOffset.getValue().offset();
            if (position < committedOffset) {
                /*
                     * The position is behind the committed offset. This can happen in some cases, e.g. if a message failed, lots of (more
                     * than max.poll.records) later messages were acked, and the failed message then gets acked. The consumer may only be
                     * part way through "catching up" to where it was when it went back to retry the failed tuple. Skip the consumer forward
                     * to the committed offset.
                     */
                LOG.debug("Consumer fell behind committed offset. Catching up. Position was [{}], skipping to [{}]", position, committedOffset);
                consumer.seek(tp, committedOffset);
            }
            /**
             * In some cases the waitingToEmit list may contain tuples that have just been committed. Drop these.
             */
            List<ConsumerRecord<K, V>> waitingToEmitForTp = waitingToEmit.get(tp);
            if (waitingToEmitForTp != null) {
                // Discard the pending records that are already committed
                waitingToEmit.put(tp, waitingToEmitForTp.stream().filter(record -> record.offset() >= committedOffset).collect(Collectors.toCollection(LinkedList::new)));
            }
            final OffsetManager offsetManager = offsetManagers.get(tp);
            offsetManager.commit(tpOffset.getValue());
            LOG.debug("[{}] uncommitted offsets for partition [{}] after commit", offsetManager.getNumUncommittedOffsets(), tp);
        }
    } else {
        LOG.trace("No offsets to commit. {}", this);
    }
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetManager(org.apache.storm.kafka.spout.internal.OffsetManager) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) HashMap(java.util.HashMap) Map(java.util.Map) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) LinkedList(java.util.LinkedList)

Example 69 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.

the class KafkaSpout method ackRetriableOffsetsIfCompactedAway.

private void ackRetriableOffsetsIfCompactedAway(Map<TopicPartition, Long> earliestRetriableOffsets, ConsumerRecords<K, V> consumerRecords) {
    for (Entry<TopicPartition, Long> entry : earliestRetriableOffsets.entrySet()) {
        TopicPartition tp = entry.getKey();
        List<ConsumerRecord<K, V>> records = consumerRecords.records(tp);
        if (!records.isEmpty()) {
            ConsumerRecord<K, V> record = records.get(0);
            long seekOffset = entry.getValue();
            long earliestReceivedOffset = record.offset();
            if (seekOffset < earliestReceivedOffset) {
                // Ack up to the first offset received if the record is not already acked or currently in the topology
                for (long i = seekOffset; i < earliestReceivedOffset; i++) {
                    KafkaSpoutMessageId msgId = retryService.getMessageId(tp, i);
                    if (!offsetManagers.get(tp).contains(msgId) && !emitted.contains(msgId)) {
                        LOG.debug("Record at offset [{}] appears to have been compacted away from topic [{}], marking as acked", i, tp);
                        retryService.remove(msgId);
                        emitted.add(msgId);
                        ack(msgId);
                    }
                }
            }
        }
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord)

Example 70 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.

the class KafkaTridentSpoutEmitter method emitPartitionBatchNew.

/**
 * Emit a new batch.
 */
public Map<String, Object> emitPartitionBatchNew(TransactionAttempt tx, TridentCollector collector, KafkaTridentSpoutTopicPartition currBatchPartition, Map<String, Object> lastBatch) {
    LOG.debug("Processing batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, collector);
    final TopicPartition currBatchTp = currBatchPartition.getTopicPartition();
    throwIfEmittingForUnassignedPartition(currBatchTp);
    KafkaTridentSpoutBatchMetadata lastBatchMeta = lastBatch == null ? null : KafkaTridentSpoutBatchMetadata.fromMap(lastBatch);
    KafkaTridentSpoutBatchMetadata currentBatch = lastBatchMeta;
    Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet();
    try {
        // pause other topic-partitions to only poll from current topic-partition
        pausedTopicPartitions = pauseTopicPartitions(currBatchTp);
        seek(currBatchTp, lastBatchMeta);
        final List<ConsumerRecord<K, V>> records = consumer.poll(pollTimeoutMs).records(currBatchTp);
        LOG.debug("Polled [{}] records from Kafka.", records.size());
        if (!records.isEmpty()) {
            for (ConsumerRecord<K, V> record : records) {
                emitTuple(collector, record);
            }
            // build new metadata based on emitted records
            currentBatch = new KafkaTridentSpoutBatchMetadata(records.get(0).offset(), records.get(records.size() - 1).offset(), topologyContext.getStormId());
        } else {
            // Build new metadata based on the consumer position.
            // We want the next emit to start at the current consumer position,
            // so make a meta that indicates that position - 1 is the last emitted offset
            // This helps us avoid cases like STORM-3279, and simplifies the seek logic.
            long lastEmittedOffset = consumer.position(currBatchTp) - 1;
            currentBatch = new KafkaTridentSpoutBatchMetadata(lastEmittedOffset, lastEmittedOffset, topologyContext.getStormId());
        }
    } finally {
        consumer.resume(pausedTopicPartitions);
        LOG.trace("Resumed topic-partitions {}", pausedTopicPartitions);
    }
    LOG.debug("Emitted batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " + "[currBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, currentBatch, collector);
    return currentBatch.toMap();
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord)

Aggregations

ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)314 TopicPartition (org.apache.kafka.common.TopicPartition)160 Test (org.junit.Test)145 ArrayList (java.util.ArrayList)123 List (java.util.List)100 HashMap (java.util.HashMap)98 Map (java.util.Map)70 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)61 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)51 Test (org.junit.jupiter.api.Test)35 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)33 KafkaConsumer (org.apache.kafka.clients.consumer.KafkaConsumer)31 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)31 LinkedHashMap (java.util.LinkedHashMap)30 Header (org.apache.kafka.common.header.Header)29 RecordHeader (org.apache.kafka.common.header.internals.RecordHeader)28 TimeUnit (java.util.concurrent.TimeUnit)27 Set (java.util.Set)24 Collectors (java.util.stream.Collectors)24 ByteBuffer (java.nio.ByteBuffer)22