Search in sources :

Example 81 with TopicPartition

use of org.apache.kafka.common.TopicPartition in project apex-malhar by apache.

the class KafkaConsumerWrapper method emitImmediately.

public void emitImmediately(Map<AbstractKafkaPartitioner.PartitionMeta, Pair<Long, Long>> windowData) {
    for (Map.Entry<AbstractKafkaPartitioner.PartitionMeta, Pair<Long, Long>> windowEntry : windowData.entrySet()) {
        AbstractKafkaPartitioner.PartitionMeta meta = windowEntry.getKey();
        Pair<Long, Long> replayOffsetSize = windowEntry.getValue();
        AbstractKafkaConsumer kc = consumers.get(meta.getCluster());
        if (kc == null && kc.isConsumerContainsPartition(windowEntry.getKey().getTopicPartition())) {
            throw new RuntimeException("Coundn't find consumer to replay the message PartitionMeta : " + meta);
        }
        // pause other partition
        for (TopicPartition tp : kc.getPartitions()) {
            if (meta.getTopicPartition().equals(tp)) {
                kc.resumePartition(tp);
            } else {
                try {
                    kc.positionPartition(tp);
                } catch (NoOffsetForPartitionException e) {
                    // the poll() method of a consumer will throw exception
                    // if any of subscribed consumers not initialized with position
                    handleNoOffsetForPartitionException(e, kc);
                }
                kc.pausePartition(tp);
            }
        }
        // set the offset to window start offset
        kc.seekToOffset(meta.getTopicPartition(), replayOffsetSize.getLeft());
        long windowCount = replayOffsetSize.getRight();
        while (windowCount > 0) {
            try {
                ConsumerRecords<byte[], byte[]> records = kc.pollRecords(ownerOperator.getConsumerTimeout());
                for (Iterator<ConsumerRecord<byte[], byte[]>> cri = records.iterator(); cri.hasNext() && windowCount > 0; ) {
                    ownerOperator.emitTuple(meta.getCluster(), cri.next());
                    windowCount--;
                }
            } catch (NoOffsetForPartitionException e) {
                throw new RuntimeException("Couldn't replay the offset", e);
            }
        }
        // set the offset after window
        kc.seekToOffset(meta.getTopicPartition(), replayOffsetSize.getLeft() + replayOffsetSize.getRight());
    }
    // resume all topics
    for (AbstractKafkaConsumer kc : consumers.values()) {
        kc.resumeAllPartitions();
    }
}
Also used : NoOffsetForPartitionException(org.apache.kafka.clients.consumer.NoOffsetForPartitionException) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) TopicPartition(org.apache.kafka.common.TopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(org.apache.commons.lang3.tuple.Pair)

Example 82 with TopicPartition

use of org.apache.kafka.common.TopicPartition in project apex-malhar by apache.

the class KafkaConsumerWrapper method start.

/**
 * This method is called in the activate method of the operator
 */
public void start(boolean waitForReplay) {
    this.waitForReplay = waitForReplay;
    isAlive.set(true);
    // thread to consume the kafka data
    // create thread pool for consumer threads
    kafkaConsumerExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("kafka-consumer-%d").build());
    // group list of PartitionMeta by cluster
    Map<String, List<TopicPartition>> consumerAssignment = new HashMap<>();
    Set<AbstractKafkaPartitioner.PartitionMeta> assignments = ownerOperator.assignment();
    for (AbstractKafkaPartitioner.PartitionMeta partitionMeta : assignments) {
        String cluster = partitionMeta.getCluster();
        List<TopicPartition> cAssignment = consumerAssignment.get(cluster);
        if (cAssignment == null) {
            cAssignment = new LinkedList<>();
            consumerAssignment.put(cluster, cAssignment);
        }
        cAssignment.add(new TopicPartition(partitionMeta.getTopic(), partitionMeta.getPartitionId()));
    }
    Map<AbstractKafkaPartitioner.PartitionMeta, Long> currentOffset = ownerOperator.getOffsetTrack();
    // each thread use one KafkaConsumer to consume from 1+ partition(s) of 1+ topic(s)
    for (Map.Entry<String, List<TopicPartition>> e : consumerAssignment.entrySet()) {
        Properties prop = new Properties();
        if (ownerOperator.getConsumerProps() != null) {
            prop.putAll(ownerOperator.getConsumerProps());
        }
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, e.getKey());
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
        // never auto commit the offsets
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
        AbstractKafkaInputOperator.InitialOffset initialOffset = AbstractKafkaInputOperator.InitialOffset.valueOf(ownerOperator.getInitialOffset());
        if (initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_EARLIEST || initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_LATEST) {
            // commit the offset with application name if we set initialoffset to application
            prop.put(ConsumerConfig.GROUP_ID_CONFIG, ownerOperator.getApplicationName() + "_Consumer");
        }
        AbstractKafkaConsumer kc = ownerOperator.createConsumer(prop);
        kc.assignPartitions(e.getValue());
        if (logger.isInfoEnabled()) {
            logger.info("Create consumer with properties {} ", Joiner.on(";").withKeyValueSeparator("=").join(prop));
            logger.info("Assign consumer to {}", Joiner.on('#').join(e.getValue()));
        }
        if (currentOffset != null && !currentOffset.isEmpty()) {
            for (TopicPartition tp : e.getValue()) {
                AbstractKafkaPartitioner.PartitionMeta partitionKey = new AbstractKafkaPartitioner.PartitionMeta(e.getKey(), tp.topic(), tp.partition());
                if (currentOffset.containsKey(partitionKey)) {
                    kc.seekToOffset(tp, currentOffset.get(partitionKey));
                }
            }
        }
        consumers.put(e.getKey(), kc);
        Future<?> future = kafkaConsumerExecutor.submit(new ConsumerThread(e.getKey(), kc, this));
        kafkaConsumerThreads.add(future);
    }
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Properties(java.util.Properties) TopicPartition(org.apache.kafka.common.TopicPartition) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 83 with TopicPartition

use of org.apache.kafka.common.TopicPartition in project eventuate-local by eventuate-local.

the class OffsetTracker method offsetsToCommit.

public Map<TopicPartition, OffsetAndMetadata> offsetsToCommit() {
    Map<TopicPartition, OffsetAndMetadata> result = new HashMap<>();
    state.forEach((tp, tpo) -> {
        tpo.offsetToCommit().ifPresent(offset -> {
            result.put(tp, new OffsetAndMetadata(offset + 1, ""));
        });
    });
    return result;
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata)

Example 84 with TopicPartition

use of org.apache.kafka.common.TopicPartition in project wikidata-query-rdf by wikimedia.

the class KafkaPoller method firstBatch.

@Override
public Batch firstBatch() throws RetryableException {
    // consumer.subscribe(topicList);
    Map<TopicPartition, Long> topicParts = newHashMapWithExpectedSize(topics.size());
    // Make a map (topic, partition) -> timestamp
    for (String topic : topics) {
        for (PartitionInfo partition : consumer.partitionsFor(topic)) {
            topicParts.put(new TopicPartition(topic, partition.partition()), firstStartTime.toEpochMilli());
        }
    }
    // Map<TopicPartition, Long> topicParts = topics.stream().flatMap(topic ->
    // consumer.partitionsFor(topic).stream().map(partition ->
    // Maps.immutableEntry(new TopicPartition(topic, partition.partition()), firstStartTime.getTime())
    // )
    // ).collect(ImmutableMap.toImmutableMap(Entry::getKey, Entry::getValue));
    // assign ourselves to all partitions of the topics we want
    consumer.assign(topicParts.keySet());
    log.info("Subscribed to {} topics", topicParts.size());
    // Seek each topic to proper offset
    consumer.offsetsForTimes(topicParts).forEach((topic, offset) -> {
        if (offset == null) {
            log.info("No offset for {}, starting at the end", topic);
            consumer.seekToEnd(Collections.singletonList(topic));
            return;
        }
        consumer.seek(topic, offset.offset());
        log.info("Set topic {} to {}", topic, offset);
    });
    return fetch(firstStartTime);
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 85 with TopicPartition

use of org.apache.kafka.common.TopicPartition in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method topicSubscribe.

@Test
public void topicSubscribe() throws RetryableException {
    Instant startTime = Instant.ofEpochMilli(BEGIN_DATE);
    Collection<String> topics = ImmutableList.of("topictest", "othertopic");
    ImmutableList<PartitionInfo> twoParts = ImmutableList.of(makePartitionInfo(0), makePartitionInfo(1));
    ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
    when(consumer.partitionsFor(partitionArgs.capture())).thenReturn(twoParts);
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
        Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
        // Check that timestamps are OK
        map.forEach((k, v) -> assertThat(v, equalTo(BEGIN_DATE)));
        Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
        // Make offset 1 for first partition and nothing for second
        map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
        // Using forEach here because collect() can't handle nulls
        return out;
    });
    ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
    doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
    ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, startTime, BATCH_SIZE, topics);
    Batch batch = poller.firstBatch();
    // We get partitions for both topics
    verify(consumer, times(2)).partitionsFor(any());
    assertThat(partitionArgs.getAllValues(), contains("topictest", "othertopic"));
    // We assign to 4 topics - 2 topics x 2 partitions
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue(), hasSize(4));
    // Calling seek on both topics, partition 0
    verify(consumer, times(2)).seek(any(), anyLong());
    assertThat(seekArgs.getAllValues().stream().map(p -> p.topic()).toArray(), arrayContainingInAnyOrder("topictest", "othertopic"));
    Collection<String> sTopics = seekArgs.getAllValues().stream().map(tp -> tp.topic()).collect(Collectors.toList());
    assertThat(sTopics, hasSize(2));
    assertThat(sTopics, containsInAnyOrder("topictest", "othertopic"));
    Collection<Integer> sPartitions = seekArgs.getAllValues().stream().map(tp -> tp.partition()).distinct().collect(Collectors.toList());
    assertThat(sPartitions, hasSize(1));
    assertThat(sPartitions, contains(0));
    // Calling seekToEnd on both topics, partition 1
    verify(consumer, times(2)).seekToEnd(any());
    Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(Collectors.toList());
    assertThat(sbTopics, hasSize(2));
    assertThat(sbTopics, contains("topictest", "othertopic"));
    Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(Collectors.toList());
    assertThat(sbPartitions, hasSize(1));
    assertThat(sbPartitions, contains(1));
    verify(consumer, times(1)).offsetsForTimes(any());
}
Also used : Arrays(java.util.Arrays) Mock(org.mockito.Mock) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Matchers.not(org.hamcrest.Matchers.not) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) RevisionCreateEvent(org.wikidata.query.rdf.tool.change.events.RevisionCreateEvent) Matchers.arrayContainingInAnyOrder(org.hamcrest.Matchers.arrayContainingInAnyOrder) Assert.assertThat(org.junit.Assert.assertThat) MockitoAnnotations(org.mockito.MockitoAnnotations) ChangeMatchers.hasRevision(org.wikidata.query.rdf.tool.change.ChangeMatchers.hasRevision) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.eq(org.mockito.Matchers.eq) PageDeleteEvent(org.wikidata.query.rdf.tool.change.events.PageDeleteEvent) Map(java.util.Map) Matchers.anyLong(org.mockito.Matchers.anyLong) Matchers.hasSize(org.hamcrest.Matchers.hasSize) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) Before(org.junit.Before) TopicPartition(org.apache.kafka.common.TopicPartition) Collection(java.util.Collection) Mockito.times(org.mockito.Mockito.times) ChangeMatchers.hasTitle(org.wikidata.query.rdf.tool.change.ChangeMatchers.hasTitle) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) Instant(java.time.Instant) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Assert.assertFalse(org.junit.Assert.assertFalse) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) ChangeMatchers.hasTitleRevision(org.wikidata.query.rdf.tool.change.ChangeMatchers.hasTitleRevision) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Instant(java.time.Instant) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Aggregations

TopicPartition (org.apache.kafka.common.TopicPartition)1729 HashMap (java.util.HashMap)744 Test (org.junit.Test)519 ArrayList (java.util.ArrayList)416 Map (java.util.Map)361 Test (org.junit.jupiter.api.Test)347 HashSet (java.util.HashSet)281 List (java.util.List)260 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)246 Set (java.util.Set)189 LinkedHashMap (java.util.LinkedHashMap)180 PartitionInfo (org.apache.kafka.common.PartitionInfo)170 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)155 TaskId (org.apache.kafka.streams.processor.TaskId)145 Node (org.apache.kafka.common.Node)140 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)109 KafkaException (org.apache.kafka.common.KafkaException)105 Errors (org.apache.kafka.common.protocol.Errors)105 ByteBuffer (java.nio.ByteBuffer)99 Properties (java.util.Properties)93