use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project flink by apache.
the class KafkaDynamicSource method createKafkaSource.
// --------------------------------------------------------------------------------------------
protected KafkaSource<RowData> createKafkaSource(DeserializationSchema<RowData> keyDeserialization, DeserializationSchema<RowData> valueDeserialization, TypeInformation<RowData> producedTypeInfo) {
final KafkaDeserializationSchema<RowData> kafkaDeserializer = createKafkaDeserializationSchema(keyDeserialization, valueDeserialization, producedTypeInfo);
final KafkaSourceBuilder<RowData> kafkaSourceBuilder = KafkaSource.builder();
if (topics != null) {
kafkaSourceBuilder.setTopics(topics);
} else {
kafkaSourceBuilder.setTopicPattern(topicPattern);
}
switch(startupMode) {
case EARLIEST:
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.earliest());
break;
case LATEST:
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.latest());
break;
case GROUP_OFFSETS:
String offsetResetConfig = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.NONE.name());
OffsetResetStrategy offsetResetStrategy = getResetStrategy(offsetResetConfig);
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.committedOffsets(offsetResetStrategy));
break;
case SPECIFIC_OFFSETS:
Map<TopicPartition, Long> offsets = new HashMap<>();
specificStartupOffsets.forEach((tp, offset) -> offsets.put(new TopicPartition(tp.getTopic(), tp.getPartition()), offset));
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.offsets(offsets));
break;
case TIMESTAMP:
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.timestamp(startupTimestampMillis));
break;
}
kafkaSourceBuilder.setProperties(properties).setDeserializer(KafkaRecordDeserializationSchema.of(kafkaDeserializer));
return kafkaSourceBuilder.build();
}
use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project beam by apache.
the class KafkaIOTest method mkMockConsumer.
// Update mock consumer with records distributed among the given topics, each with given number
// of partitions. Records are assigned in round-robin order among the partitions.
private static MockConsumer<byte[], byte[]> mkMockConsumer(List<String> topics, int partitionsPerTopic, int numElements, OffsetResetStrategy offsetResetStrategy, Map<String, Object> config, SerializableFunction<Integer, byte[]> keyFunction, SerializableFunction<Integer, byte[]> valueFunction) {
final List<TopicPartition> partitions = new ArrayList<>();
final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = new HashMap<>();
Map<String, List<PartitionInfo>> partitionMap = new HashMap<>();
for (String topic : topics) {
List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
for (int i = 0; i < partitionsPerTopic; i++) {
TopicPartition tp = new TopicPartition(topic, i);
partitions.add(tp);
partIds.add(new PartitionInfo(topic, i, null, null, null));
records.put(tp, new ArrayList<>());
}
partitionMap.put(topic, partIds);
}
int numPartitions = partitions.size();
final long[] offsets = new long[numPartitions];
long timestampStartMillis = (Long) config.getOrDefault(TIMESTAMP_START_MILLIS_CONFIG, LOG_APPEND_START_TIME.getMillis());
TimestampType timestampType = TimestampType.forName((String) config.getOrDefault(TIMESTAMP_TYPE_CONFIG, TimestampType.LOG_APPEND_TIME.toString()));
for (int i = 0; i < numElements; i++) {
int pIdx = i % numPartitions;
TopicPartition tp = partitions.get(pIdx);
byte[] key = keyFunction.apply(i);
byte[] value = valueFunction.apply(i);
records.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), offsets[pIdx]++, timestampStartMillis + Duration.standardSeconds(i).getMillis(), timestampType, 0, key.length, value.length, key, value));
}
// This is updated when reader assigns partitions.
final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.<TopicPartition>emptyList());
final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {
@Override
public synchronized void assign(final Collection<TopicPartition> assigned) {
super.assign(assigned);
assignedPartitions.set(ImmutableList.copyOf(assigned));
for (TopicPartition tp : assigned) {
updateBeginningOffsets(ImmutableMap.of(tp, 0L));
updateEndOffsets(ImmutableMap.of(tp, (long) records.get(tp).size()));
}
}
// Override offsetsForTimes() in order to look up the offsets by timestamp.
@Override
public synchronized Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
return timestampsToSearch.entrySet().stream().map(e -> {
// In test scope, timestamp == offset.
long maxOffset = offsets[partitions.indexOf(e.getKey())];
long offset = e.getValue();
OffsetAndTimestamp value = (offset >= maxOffset) ? null : new OffsetAndTimestamp(offset, offset);
return new SimpleEntry<>(e.getKey(), value);
}).collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
}
};
for (String topic : topics) {
consumer.updatePartitions(topic, partitionMap.get(topic));
}
// MockConsumer does not maintain any relationship between partition seek position and the
// records added. e.g. if we add 10 records to a partition and then seek to end of the
// partition, MockConsumer is still going to return the 10 records in next poll. It is
// our responsibility to make sure currently enqueued records sync with partition offsets.
// The following task will be called inside each invocation to MockConsumer.poll().
// We enqueue only the records with the offset >= partition's current position.
Runnable recordEnqueueTask = new Runnable() {
@Override
public void run() {
// add all the records with offset >= current partition position.
int recordsAdded = 0;
for (TopicPartition tp : assignedPartitions.get()) {
long curPos = consumer.position(tp);
for (ConsumerRecord<byte[], byte[]> r : records.get(tp)) {
if (r.offset() >= curPos) {
consumer.addRecord(r);
recordsAdded++;
}
}
}
if (recordsAdded == 0) {
if (config.get("inject.error.at.eof") != null) {
consumer.setException(new KafkaException("Injected error in consumer.poll()"));
}
// MockConsumer.poll(timeout) does not actually wait even when there aren't any
// records.
// Add a small wait here in order to avoid busy looping in the reader.
Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS);
// TODO: BEAM-4086: testUnboundedSourceWithoutBoundedWrapper() occasionally hangs
// without this wait. Need to look into it.
}
consumer.schedulePollTask(this);
}
};
consumer.schedulePollTask(recordEnqueueTask);
return consumer;
}
use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project beam by apache.
the class KafkaTestTable method mkMockConsumer.
private MockConsumer<byte[], byte[]> mkMockConsumer(Map<String, Object> config) {
OffsetResetStrategy offsetResetStrategy = OffsetResetStrategy.EARLIEST;
final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> kafkaRecords = new HashMap<>();
Map<String, List<PartitionInfo>> partitionInfoMap = new HashMap<>();
Map<String, List<TopicPartition>> partitionMap = new HashMap<>();
// Create Topic Paritions
for (String topic : this.getTopics()) {
List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
List<TopicPartition> topicParitions = new ArrayList<>(partitionsPerTopic);
for (int i = 0; i < partitionsPerTopic; i++) {
TopicPartition tp = new TopicPartition(topic, i);
topicParitions.add(tp);
partIds.add(new PartitionInfo(topic, i, null, null, null));
kafkaRecords.put(tp, new ArrayList<>());
}
partitionInfoMap.put(topic, partIds);
partitionMap.put(topic, topicParitions);
}
TimestampType timestampType = TimestampType.forName((String) config.getOrDefault(TIMESTAMP_TYPE_CONFIG, TimestampType.LOG_APPEND_TIME.toString()));
for (KafkaTestRecord record : this.records) {
int partitionIndex = record.getKey().hashCode() % partitionsPerTopic;
TopicPartition tp = partitionMap.get(record.getTopic()).get(partitionIndex);
byte[] key = record.getKey().getBytes(UTF_8);
byte[] value = record.getValue().toByteArray();
kafkaRecords.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), kafkaRecords.get(tp).size(), record.getTimeStamp(), timestampType, 0, key.length, value.length, key, value));
}
// This is updated when reader assigns partitions.
final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.emptyList());
final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {
@Override
public synchronized void assign(final Collection<TopicPartition> assigned) {
Collection<TopicPartition> realPartitions = assigned.stream().map(part -> partitionMap.get(part.topic()).get(part.partition())).collect(Collectors.toList());
super.assign(realPartitions);
assignedPartitions.set(ImmutableList.copyOf(realPartitions));
for (TopicPartition tp : realPartitions) {
updateBeginningOffsets(ImmutableMap.of(tp, 0L));
updateEndOffsets(ImmutableMap.of(tp, (long) kafkaRecords.get(tp).size()));
}
}
// Override offsetsForTimes() in order to look up the offsets by timestamp.
@Override
public synchronized Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
return timestampsToSearch.entrySet().stream().map(e -> {
// In test scope, timestamp == offset. ????
long maxOffset = kafkaRecords.get(e.getKey()).size();
long offset = e.getValue();
OffsetAndTimestamp value = (offset >= maxOffset) ? null : new OffsetAndTimestamp(offset, offset);
return new AbstractMap.SimpleEntry<>(e.getKey(), value);
}).collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
}
};
for (String topic : getTopics()) {
consumer.updatePartitions(topic, partitionInfoMap.get(topic));
}
Runnable recordEnqueueTask = new Runnable() {
@Override
public void run() {
// add all the records with offset >= current partition position.
int recordsAdded = 0;
for (TopicPartition tp : assignedPartitions.get()) {
long curPos = consumer.position(tp);
for (ConsumerRecord<byte[], byte[]> r : kafkaRecords.get(tp)) {
if (r.offset() >= curPos) {
consumer.addRecord(r);
recordsAdded++;
}
}
}
if (recordsAdded == 0) {
if (config.get("inject.error.at.eof") != null) {
consumer.setException(new KafkaException("Injected error in consumer.poll()"));
}
// MockConsumer.poll(timeout) does not actually wait even when there aren't any
// records.
// Add a small wait here in order to avoid busy looping in the reader.
Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS);
}
consumer.schedulePollTask(this);
}
};
consumer.schedulePollTask(recordEnqueueTask);
return consumer;
}
use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project kafka by apache.
the class Fetcher method resetOffset.
/**
* Reset offsets for the given partition using the offset reset strategy.
*
* @param partition The given partition that needs reset offset
* @throws org.apache.kafka.clients.consumer.NoOffsetForPartitionException If no offset reset strategy is defined
*/
private void resetOffset(TopicPartition partition) {
OffsetResetStrategy strategy = subscriptions.resetStrategy(partition);
log.debug("Resetting offset for partition {} to {} offset.", partition, strategy.name().toLowerCase(Locale.ROOT));
final long timestamp;
if (strategy == OffsetResetStrategy.EARLIEST)
timestamp = ListOffsetRequest.EARLIEST_TIMESTAMP;
else if (strategy == OffsetResetStrategy.LATEST)
timestamp = ListOffsetRequest.LATEST_TIMESTAMP;
else
throw new NoOffsetForPartitionException(partition);
Map<TopicPartition, OffsetData> offsetsByTimes = retrieveOffsetsByTimes(Collections.singletonMap(partition, timestamp), Long.MAX_VALUE, false);
OffsetData offsetData = offsetsByTimes.get(partition);
if (offsetData == null)
throw new NoOffsetForPartitionException(partition);
long offset = offsetData.offset;
// we might lose the assignment while fetching the offset, so check it is still active
if (subscriptions.isAssigned(partition))
this.subscriptions.seek(partition, offset);
}
use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project kafka by apache.
the class FetcherTest method testOffsetValidationWithGivenEpochOffset.
private void testOffsetValidationWithGivenEpochOffset(int leaderEpoch, long endOffset, OffsetResetStrategy offsetResetStrategy) {
buildFetcher(offsetResetStrategy);
assignFromUser(singleton(tp0));
Map<String, Integer> partitionCounts = new HashMap<>();
partitionCounts.put(tp0.topic(), 4);
final int epochOne = 1;
final long initialOffset = 5;
metadata.updateWithCurrentRequestVersion(RequestTestUtils.metadataUpdateWithIds("dummy", 1, Collections.emptyMap(), partitionCounts, tp -> epochOne, topicIds), false, 0L);
// Offset validation requires OffsetForLeaderEpoch request v3 or higher
Node node = metadata.fetch().nodes().get(0);
apiVersions.update(node.idString(), NodeApiVersions.create());
Metadata.LeaderAndEpoch leaderAndEpoch = new Metadata.LeaderAndEpoch(metadata.currentLeader(tp0).leader, Optional.of(epochOne));
subscriptions.seekUnvalidated(tp0, new SubscriptionState.FetchPosition(initialOffset, Optional.of(epochOne), leaderAndEpoch));
fetcher.validateOffsetsIfNeeded();
consumerClient.poll(time.timer(Duration.ZERO));
assertTrue(subscriptions.awaitingValidation(tp0));
assertTrue(client.hasInFlightRequests());
client.respond(offsetsForLeaderEpochRequestMatcher(tp0, epochOne, epochOne), prepareOffsetsForLeaderEpochResponse(tp0, Errors.NONE, leaderEpoch, endOffset));
consumerClient.poll(time.timer(Duration.ZERO));
if (offsetResetStrategy == OffsetResetStrategy.NONE) {
LogTruncationException thrown = assertThrows(LogTruncationException.class, () -> fetcher.validateOffsetsIfNeeded());
assertEquals(singletonMap(tp0, initialOffset), thrown.offsetOutOfRangePartitions());
if (endOffset == UNDEFINED_EPOCH_OFFSET || leaderEpoch == UNDEFINED_EPOCH) {
assertEquals(Collections.emptyMap(), thrown.divergentOffsets());
} else {
OffsetAndMetadata expectedDivergentOffset = new OffsetAndMetadata(endOffset, Optional.of(leaderEpoch), "");
assertEquals(singletonMap(tp0, expectedDivergentOffset), thrown.divergentOffsets());
}
assertTrue(subscriptions.awaitingValidation(tp0));
} else {
fetcher.validateOffsetsIfNeeded();
assertFalse(subscriptions.awaitingValidation(tp0));
}
}
Aggregations