use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.
the class KafkaSourceReader method snapshotState.
@Override
public List<KafkaPartitionSplit> snapshotState(long checkpointId) {
List<KafkaPartitionSplit> splits = super.snapshotState(checkpointId);
if (!commitOffsetsOnCheckpoint) {
return splits;
}
if (splits.isEmpty() && offsetsOfFinishedSplits.isEmpty()) {
offsetsToCommit.put(checkpointId, Collections.emptyMap());
} else {
Map<TopicPartition, OffsetAndMetadata> offsetsMap = offsetsToCommit.computeIfAbsent(checkpointId, id -> new HashMap<>());
// Put the offsets of the active splits.
for (KafkaPartitionSplit split : splits) {
// is retrieved, do not commit the offsets for those partitions.
if (split.getStartingOffset() >= 0) {
offsetsMap.put(split.getTopicPartition(), new OffsetAndMetadata(split.getStartingOffset()));
}
}
// Put offsets of all the finished splits.
offsetsMap.putAll(offsetsOfFinishedSplits);
}
return splits;
}
use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.
the class UpsertKafkaDynamicTableFactoryTest method assertKafkaSource.
private void assertKafkaSource(ScanTableSource.ScanRuntimeProvider provider) {
assertThat(provider, instanceOf(DataStreamScanProvider.class));
final DataStreamScanProvider dataStreamScanProvider = (DataStreamScanProvider) provider;
final Transformation<RowData> transformation = dataStreamScanProvider.produceDataStream(n -> Optional.empty(), StreamExecutionEnvironment.createLocalEnvironment()).getTransformation();
assertThat(transformation, instanceOf(SourceTransformation.class));
SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState> sourceTransformation = (SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState>) transformation;
assertThat(sourceTransformation.getSource(), instanceOf(KafkaSource.class));
}
use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.
the class KafkaDynamicTableFactoryTest method assertKafkaSource.
private KafkaSource<?> assertKafkaSource(ScanTableSource.ScanRuntimeProvider provider) {
assertThat(provider).isInstanceOf(DataStreamScanProvider.class);
final DataStreamScanProvider dataStreamScanProvider = (DataStreamScanProvider) provider;
final Transformation<RowData> transformation = dataStreamScanProvider.produceDataStream(n -> Optional.empty(), StreamExecutionEnvironment.createLocalEnvironment()).getTransformation();
assertThat(transformation).isInstanceOf(SourceTransformation.class);
SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState> sourceTransformation = (SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState>) transformation;
assertThat(sourceTransformation.getSource()).isInstanceOf(KafkaSource.class);
return (KafkaSource<?>) sourceTransformation.getSource();
}
use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.
the class KafkaSourceEnumStateSerializer method deserialize.
@Override
public KafkaSourceEnumState deserialize(int version, byte[] serialized) throws IOException {
if (version == CURRENT_VERSION) {
final Set<TopicPartition> assignedPartitions = deserializeTopicPartitions(serialized);
return new KafkaSourceEnumState(assignedPartitions);
}
// Backward compatibility
if (version == VERSION_0) {
Map<Integer, Set<KafkaPartitionSplit>> currentPartitionAssignment = SerdeUtils.deserializeSplitAssignments(serialized, new KafkaPartitionSplitSerializer(), HashSet::new);
Set<TopicPartition> currentAssignedSplits = new HashSet<>();
currentPartitionAssignment.forEach((reader, splits) -> splits.forEach(split -> currentAssignedSplits.add(split.getTopicPartition())));
return new KafkaSourceEnumState(currentAssignedSplits);
}
throw new IOException(String.format("The bytes are serialized with version %d, " + "while this deserializer only supports version up to %d", version, CURRENT_VERSION));
}
use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.
the class KafkaSourceEnumerator method initializePartitionSplits.
/**
* Initialize splits for newly discovered partitions.
*
* <p>Enumerator will be responsible for fetching offsets when initializing splits if:
*
* <ul>
* <li>using timestamp for initializing offset
* <li>or using specified offset, but the offset is not provided for the newly discovered
* partitions
* </ul>
*
* <p>Otherwise offsets will be initialized by readers.
*
* <p>NOTE: This method should only be invoked in the worker executor thread, because it
* potentially requires network I/O with Kafka brokers for fetching offsets.
*
* @param partitionChange Newly discovered and removed partitions
* @return {@link KafkaPartitionSplit} of new partitions and {@link TopicPartition} of removed
* partitions
*/
private PartitionSplitChange initializePartitionSplits(PartitionChange partitionChange) {
Set<TopicPartition> newPartitions = Collections.unmodifiableSet(partitionChange.getNewPartitions());
OffsetsInitializer.PartitionOffsetsRetriever offsetsRetriever = getOffsetsRetriever();
Map<TopicPartition, Long> startingOffsets = startingOffsetInitializer.getPartitionOffsets(newPartitions, offsetsRetriever);
Map<TopicPartition, Long> stoppingOffsets = stoppingOffsetInitializer.getPartitionOffsets(newPartitions, offsetsRetriever);
Set<KafkaPartitionSplit> partitionSplits = new HashSet<>(newPartitions.size());
for (TopicPartition tp : newPartitions) {
Long startingOffset = startingOffsets.get(tp);
long stoppingOffset = stoppingOffsets.getOrDefault(tp, KafkaPartitionSplit.NO_STOPPING_OFFSET);
partitionSplits.add(new KafkaPartitionSplit(tp, startingOffset, stoppingOffset));
}
return new PartitionSplitChange(partitionSplits, partitionChange.getRemovedPartitions());
}
Aggregations