use of org.apache.kafka.common.TopicPartition in project apex-malhar by apache.
the class KafkaConsumerWrapper method emitImmediately.
public void emitImmediately(Map<AbstractKafkaPartitioner.PartitionMeta, Pair<Long, Long>> windowData) {
for (Map.Entry<AbstractKafkaPartitioner.PartitionMeta, Pair<Long, Long>> windowEntry : windowData.entrySet()) {
AbstractKafkaPartitioner.PartitionMeta meta = windowEntry.getKey();
Pair<Long, Long> replayOffsetSize = windowEntry.getValue();
AbstractKafkaConsumer kc = consumers.get(meta.getCluster());
if (kc == null && kc.isConsumerContainsPartition(windowEntry.getKey().getTopicPartition())) {
throw new RuntimeException("Coundn't find consumer to replay the message PartitionMeta : " + meta);
}
// pause other partition
for (TopicPartition tp : kc.getPartitions()) {
if (meta.getTopicPartition().equals(tp)) {
kc.resumePartition(tp);
} else {
try {
kc.positionPartition(tp);
} catch (NoOffsetForPartitionException e) {
// the poll() method of a consumer will throw exception
// if any of subscribed consumers not initialized with position
handleNoOffsetForPartitionException(e, kc);
}
kc.pausePartition(tp);
}
}
// set the offset to window start offset
kc.seekToOffset(meta.getTopicPartition(), replayOffsetSize.getLeft());
long windowCount = replayOffsetSize.getRight();
while (windowCount > 0) {
try {
ConsumerRecords<byte[], byte[]> records = kc.pollRecords(ownerOperator.getConsumerTimeout());
for (Iterator<ConsumerRecord<byte[], byte[]>> cri = records.iterator(); cri.hasNext() && windowCount > 0; ) {
ownerOperator.emitTuple(meta.getCluster(), cri.next());
windowCount--;
}
} catch (NoOffsetForPartitionException e) {
throw new RuntimeException("Couldn't replay the offset", e);
}
}
// set the offset after window
kc.seekToOffset(meta.getTopicPartition(), replayOffsetSize.getLeft() + replayOffsetSize.getRight());
}
// resume all topics
for (AbstractKafkaConsumer kc : consumers.values()) {
kc.resumeAllPartitions();
}
}
use of org.apache.kafka.common.TopicPartition in project apex-malhar by apache.
the class KafkaConsumerWrapper method start.
/**
* This method is called in the activate method of the operator
*/
public void start(boolean waitForReplay) {
this.waitForReplay = waitForReplay;
isAlive.set(true);
// thread to consume the kafka data
// create thread pool for consumer threads
kafkaConsumerExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("kafka-consumer-%d").build());
// group list of PartitionMeta by cluster
Map<String, List<TopicPartition>> consumerAssignment = new HashMap<>();
Set<AbstractKafkaPartitioner.PartitionMeta> assignments = ownerOperator.assignment();
for (AbstractKafkaPartitioner.PartitionMeta partitionMeta : assignments) {
String cluster = partitionMeta.getCluster();
List<TopicPartition> cAssignment = consumerAssignment.get(cluster);
if (cAssignment == null) {
cAssignment = new LinkedList<>();
consumerAssignment.put(cluster, cAssignment);
}
cAssignment.add(new TopicPartition(partitionMeta.getTopic(), partitionMeta.getPartitionId()));
}
Map<AbstractKafkaPartitioner.PartitionMeta, Long> currentOffset = ownerOperator.getOffsetTrack();
// each thread use one KafkaConsumer to consume from 1+ partition(s) of 1+ topic(s)
for (Map.Entry<String, List<TopicPartition>> e : consumerAssignment.entrySet()) {
Properties prop = new Properties();
if (ownerOperator.getConsumerProps() != null) {
prop.putAll(ownerOperator.getConsumerProps());
}
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, e.getKey());
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
// never auto commit the offsets
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
AbstractKafkaInputOperator.InitialOffset initialOffset = AbstractKafkaInputOperator.InitialOffset.valueOf(ownerOperator.getInitialOffset());
if (initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_EARLIEST || initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_LATEST) {
// commit the offset with application name if we set initialoffset to application
prop.put(ConsumerConfig.GROUP_ID_CONFIG, ownerOperator.getApplicationName() + "_Consumer");
}
AbstractKafkaConsumer kc = ownerOperator.createConsumer(prop);
kc.assignPartitions(e.getValue());
if (logger.isInfoEnabled()) {
logger.info("Create consumer with properties {} ", Joiner.on(";").withKeyValueSeparator("=").join(prop));
logger.info("Assign consumer to {}", Joiner.on('#').join(e.getValue()));
}
if (currentOffset != null && !currentOffset.isEmpty()) {
for (TopicPartition tp : e.getValue()) {
AbstractKafkaPartitioner.PartitionMeta partitionKey = new AbstractKafkaPartitioner.PartitionMeta(e.getKey(), tp.topic(), tp.partition());
if (currentOffset.containsKey(partitionKey)) {
kc.seekToOffset(tp, currentOffset.get(partitionKey));
}
}
}
consumers.put(e.getKey(), kc);
Future<?> future = kafkaConsumerExecutor.submit(new ConsumerThread(e.getKey(), kc, this));
kafkaConsumerThreads.add(future);
}
}
use of org.apache.kafka.common.TopicPartition in project eventuate-local by eventuate-local.
the class OffsetTracker method offsetsToCommit.
public Map<TopicPartition, OffsetAndMetadata> offsetsToCommit() {
Map<TopicPartition, OffsetAndMetadata> result = new HashMap<>();
state.forEach((tp, tpo) -> {
tpo.offsetToCommit().ifPresent(offset -> {
result.put(tp, new OffsetAndMetadata(offset + 1, ""));
});
});
return result;
}
use of org.apache.kafka.common.TopicPartition in project wikidata-query-rdf by wikimedia.
the class KafkaPoller method firstBatch.
@Override
public Batch firstBatch() throws RetryableException {
// consumer.subscribe(topicList);
Map<TopicPartition, Long> topicParts = newHashMapWithExpectedSize(topics.size());
// Make a map (topic, partition) -> timestamp
for (String topic : topics) {
for (PartitionInfo partition : consumer.partitionsFor(topic)) {
topicParts.put(new TopicPartition(topic, partition.partition()), firstStartTime.toEpochMilli());
}
}
// Map<TopicPartition, Long> topicParts = topics.stream().flatMap(topic ->
// consumer.partitionsFor(topic).stream().map(partition ->
// Maps.immutableEntry(new TopicPartition(topic, partition.partition()), firstStartTime.getTime())
// )
// ).collect(ImmutableMap.toImmutableMap(Entry::getKey, Entry::getValue));
// assign ourselves to all partitions of the topics we want
consumer.assign(topicParts.keySet());
log.info("Subscribed to {} topics", topicParts.size());
// Seek each topic to proper offset
consumer.offsetsForTimes(topicParts).forEach((topic, offset) -> {
if (offset == null) {
log.info("No offset for {}, starting at the end", topic);
consumer.seekToEnd(Collections.singletonList(topic));
return;
}
consumer.seek(topic, offset.offset());
log.info("Set topic {} to {}", topic, offset);
});
return fetch(firstStartTime);
}
use of org.apache.kafka.common.TopicPartition in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method topicSubscribe.
@Test
public void topicSubscribe() throws RetryableException {
Instant startTime = Instant.ofEpochMilli(BEGIN_DATE);
Collection<String> topics = ImmutableList.of("topictest", "othertopic");
ImmutableList<PartitionInfo> twoParts = ImmutableList.of(makePartitionInfo(0), makePartitionInfo(1));
ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
when(consumer.partitionsFor(partitionArgs.capture())).thenReturn(twoParts);
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
// Check that timestamps are OK
map.forEach((k, v) -> assertThat(v, equalTo(BEGIN_DATE)));
Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
// Make offset 1 for first partition and nothing for second
map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
// Using forEach here because collect() can't handle nulls
return out;
});
ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, startTime, BATCH_SIZE, topics);
Batch batch = poller.firstBatch();
// We get partitions for both topics
verify(consumer, times(2)).partitionsFor(any());
assertThat(partitionArgs.getAllValues(), contains("topictest", "othertopic"));
// We assign to 4 topics - 2 topics x 2 partitions
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue(), hasSize(4));
// Calling seek on both topics, partition 0
verify(consumer, times(2)).seek(any(), anyLong());
assertThat(seekArgs.getAllValues().stream().map(p -> p.topic()).toArray(), arrayContainingInAnyOrder("topictest", "othertopic"));
Collection<String> sTopics = seekArgs.getAllValues().stream().map(tp -> tp.topic()).collect(Collectors.toList());
assertThat(sTopics, hasSize(2));
assertThat(sTopics, containsInAnyOrder("topictest", "othertopic"));
Collection<Integer> sPartitions = seekArgs.getAllValues().stream().map(tp -> tp.partition()).distinct().collect(Collectors.toList());
assertThat(sPartitions, hasSize(1));
assertThat(sPartitions, contains(0));
// Calling seekToEnd on both topics, partition 1
verify(consumer, times(2)).seekToEnd(any());
Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(Collectors.toList());
assertThat(sbTopics, hasSize(2));
assertThat(sbTopics, contains("topictest", "othertopic"));
Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(Collectors.toList());
assertThat(sbPartitions, hasSize(1));
assertThat(sbPartitions, contains(1));
verify(consumer, times(1)).offsetsForTimes(any());
}
Aggregations