Search in sources :

Example 11 with KafkaPartitionSplit

use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.

the class KafkaSourceReader method snapshotState.

@Override
public List<KafkaPartitionSplit> snapshotState(long checkpointId) {
    List<KafkaPartitionSplit> splits = super.snapshotState(checkpointId);
    if (!commitOffsetsOnCheckpoint) {
        return splits;
    }
    if (splits.isEmpty() && offsetsOfFinishedSplits.isEmpty()) {
        offsetsToCommit.put(checkpointId, Collections.emptyMap());
    } else {
        Map<TopicPartition, OffsetAndMetadata> offsetsMap = offsetsToCommit.computeIfAbsent(checkpointId, id -> new HashMap<>());
        // Put the offsets of the active splits.
        for (KafkaPartitionSplit split : splits) {
            // is retrieved, do not commit the offsets for those partitions.
            if (split.getStartingOffset() >= 0) {
                offsetsMap.put(split.getTopicPartition(), new OffsetAndMetadata(split.getStartingOffset()));
            }
        }
        // Put offsets of all the finished splits.
        offsetsMap.putAll(offsetsOfFinishedSplits);
    }
    return splits;
}
Also used : KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata)

Example 12 with KafkaPartitionSplit

use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.

the class UpsertKafkaDynamicTableFactoryTest method assertKafkaSource.

private void assertKafkaSource(ScanTableSource.ScanRuntimeProvider provider) {
    assertThat(provider, instanceOf(DataStreamScanProvider.class));
    final DataStreamScanProvider dataStreamScanProvider = (DataStreamScanProvider) provider;
    final Transformation<RowData> transformation = dataStreamScanProvider.produceDataStream(n -> Optional.empty(), StreamExecutionEnvironment.createLocalEnvironment()).getTransformation();
    assertThat(transformation, instanceOf(SourceTransformation.class));
    SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState> sourceTransformation = (SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState>) transformation;
    assertThat(sourceTransformation.getSource(), instanceOf(KafkaSource.class));
}
Also used : DataType(org.apache.flink.table.types.DataType) AtomicDataType(org.apache.flink.table.types.AtomicDataType) Arrays(java.util.Arrays) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) FactoryMocks.createTableSink(org.apache.flink.table.factories.utils.FactoryMocks.createTableSink) ConfluentRegistryAvroSerializationSchema(org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroSerializationSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) FlinkMatchers.containsCause(org.apache.flink.core.testutils.FlinkMatchers.containsCause) AVRO_CONFLUENT(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.AVRO_CONFLUENT) AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) DataStreamSinkProvider(org.apache.flink.table.connector.sink.DataStreamSinkProvider) ValidationException(org.apache.flink.table.api.ValidationException) Optional(java.util.Optional) ScanRuntimeProviderContext(org.apache.flink.table.runtime.connector.source.ScanRuntimeProviderContext) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) TestFormatFactory(org.apache.flink.table.factories.TestFormatFactory) DeliveryGuarantee(org.apache.flink.connector.base.DeliveryGuarantee) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) Sink(org.apache.flink.api.connector.sink2.Sink) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Column(org.apache.flink.table.catalog.Column) HashMap(java.util.HashMap) RowType(org.apache.flink.table.types.logical.RowType) ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) SinkV2Provider(org.apache.flink.table.connector.sink.SinkV2Provider) KafkaSink(org.apache.flink.connector.kafka.sink.KafkaSink) RowDataToAvroConverters(org.apache.flink.formats.avro.RowDataToAvroConverters) FactoryMocks.createTableSource(org.apache.flink.table.factories.utils.FactoryMocks.createTableSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SinkWriterOperatorFactory(org.apache.flink.streaming.runtime.operators.sink.SinkWriterOperatorFactory) ExpectedException(org.junit.rules.ExpectedException) RowData(org.apache.flink.table.data.RowData) Properties(java.util.Properties) Assert.assertTrue(org.junit.Assert.assertTrue) DataTypes(org.apache.flink.table.api.DataTypes) VarCharType(org.apache.flink.table.types.logical.VarCharType) Test(org.junit.Test) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) KafkaSourceEnumState(org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumState) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) Consumer(java.util.function.Consumer) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) Rule(org.junit.Rule) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) UniqueConstraint(org.apache.flink.table.catalog.UniqueConstraint) SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) FactoryMocks(org.apache.flink.table.factories.utils.FactoryMocks) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) Transformation(org.apache.flink.api.dag.Transformation) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) AvroSchemaConverter(org.apache.flink.formats.avro.typeutils.AvroSchemaConverter) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) KafkaSourceEnumState(org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumState) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation)

Example 13 with KafkaPartitionSplit

use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.

the class KafkaDynamicTableFactoryTest method assertKafkaSource.

private KafkaSource<?> assertKafkaSource(ScanTableSource.ScanRuntimeProvider provider) {
    assertThat(provider).isInstanceOf(DataStreamScanProvider.class);
    final DataStreamScanProvider dataStreamScanProvider = (DataStreamScanProvider) provider;
    final Transformation<RowData> transformation = dataStreamScanProvider.produceDataStream(n -> Optional.empty(), StreamExecutionEnvironment.createLocalEnvironment()).getTransformation();
    assertThat(transformation).isInstanceOf(SourceTransformation.class);
    SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState> sourceTransformation = (SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState>) transformation;
    assertThat(sourceTransformation.getSource()).isInstanceOf(KafkaSource.class);
    return (KafkaSource<?>) sourceTransformation.getSource();
}
Also used : DataType(org.apache.flink.table.types.DataType) ConfigOptions(org.apache.flink.configuration.ConfigOptions) Arrays(java.util.Arrays) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) Map(java.util.Map) FactoryMocks.createTableSink(org.apache.flink.table.factories.utils.FactoryMocks.createTableSink) FlinkFixedPartitioner(org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner) ConfluentRegistryAvroSerializationSchema(org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroSerializationSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Set(java.util.Set) EncodingFormatMock(org.apache.flink.table.factories.TestFormatFactory.EncodingFormatMock) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) AVRO_CONFLUENT(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.AVRO_CONFLUENT) ResolvedExpressionMock(org.apache.flink.table.expressions.utils.ResolvedExpressionMock) AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) Test(org.junit.jupiter.api.Test) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) ValidationException(org.apache.flink.table.api.ValidationException) FlinkAssertions.containsCause(org.apache.flink.core.testutils.FlinkAssertions.containsCause) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) ScanRuntimeProviderContext(org.apache.flink.table.runtime.connector.source.ScanRuntimeProviderContext) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) TestFormatFactory(org.apache.flink.table.factories.TestFormatFactory) DeliveryGuarantee(org.apache.flink.connector.base.DeliveryGuarantee) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) Sink(org.apache.flink.api.connector.sink2.Sink) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) Column(org.apache.flink.table.catalog.Column) HashMap(java.util.HashMap) RowType(org.apache.flink.table.types.logical.RowType) ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) SinkV2Provider(org.apache.flink.table.connector.sink.SinkV2Provider) HashSet(java.util.HashSet) TestLoggerExtension(org.apache.flink.util.TestLoggerExtension) PROPERTIES_PREFIX(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.PROPERTIES_PREFIX) KafkaSink(org.apache.flink.connector.kafka.sink.KafkaSink) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Assertions.assertThatExceptionOfType(org.assertj.core.api.Assertions.assertThatExceptionOfType) RowDataToAvroConverters(org.apache.flink.formats.avro.RowDataToAvroConverters) KafkaSourceOptions(org.apache.flink.connector.kafka.source.KafkaSourceOptions) FactoryMocks.createTableSource(org.apache.flink.table.factories.utils.FactoryMocks.createTableSource) Nullable(javax.annotation.Nullable) ValueSource(org.junit.jupiter.params.provider.ValueSource) DEBEZIUM_AVRO_CONFLUENT(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.DEBEZIUM_AVRO_CONFLUENT) RowData(org.apache.flink.table.data.RowData) Properties(java.util.Properties) WatermarkSpec(org.apache.flink.table.catalog.WatermarkSpec) Configuration(org.apache.flink.configuration.Configuration) DataTypes(org.apache.flink.table.api.DataTypes) ScanStartupMode(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanStartupMode) KafkaSourceEnumState(org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumState) FlinkKafkaPartitioner(org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) Consumer(java.util.function.Consumer) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) UniqueConstraint(org.apache.flink.table.catalog.UniqueConstraint) DecodingFormatMock(org.apache.flink.table.factories.TestFormatFactory.DecodingFormatMock) SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) ImmutableList(org.apache.flink.shaded.guava30.com.google.common.collect.ImmutableList) KafkaSourceTestUtils(org.apache.flink.connector.kafka.source.KafkaSourceTestUtils) FactoryMocks(org.apache.flink.table.factories.utils.FactoryMocks) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) DebeziumAvroSerializationSchema(org.apache.flink.formats.avro.registry.confluent.debezium.DebeziumAvroSerializationSchema) NullSource(org.junit.jupiter.params.provider.NullSource) Transformation(org.apache.flink.api.dag.Transformation) Collections(java.util.Collections) AvroSchemaConverter(org.apache.flink.formats.avro.typeutils.AvroSchemaConverter) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) RowData(org.apache.flink.table.data.RowData) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) KafkaSourceEnumState(org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumState) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation)

Example 14 with KafkaPartitionSplit

use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.

the class KafkaSourceEnumStateSerializer method deserialize.

@Override
public KafkaSourceEnumState deserialize(int version, byte[] serialized) throws IOException {
    if (version == CURRENT_VERSION) {
        final Set<TopicPartition> assignedPartitions = deserializeTopicPartitions(serialized);
        return new KafkaSourceEnumState(assignedPartitions);
    }
    // Backward compatibility
    if (version == VERSION_0) {
        Map<Integer, Set<KafkaPartitionSplit>> currentPartitionAssignment = SerdeUtils.deserializeSplitAssignments(serialized, new KafkaPartitionSplitSerializer(), HashSet::new);
        Set<TopicPartition> currentAssignedSplits = new HashSet<>();
        currentPartitionAssignment.forEach((reader, splits) -> splits.forEach(split -> currentAssignedSplits.add(split.getTopicPartition())));
        return new KafkaSourceEnumState(currentAssignedSplits);
    }
    throw new IOException(String.format("The bytes are serialized with version %d, " + "while this deserializer only supports version up to %d", version, CURRENT_VERSION));
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) DataInputStream(java.io.DataInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) HashSet(java.util.HashSet) KafkaPartitionSplitSerializer(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplitSerializer) ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) Map(java.util.Map) SerdeUtils(org.apache.flink.connector.base.source.utils.SerdeUtils) Internal(org.apache.flink.annotation.Internal) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) Set(java.util.Set) HashSet(java.util.HashSet) KafkaPartitionSplitSerializer(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplitSerializer) TopicPartition(org.apache.kafka.common.TopicPartition) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 15 with KafkaPartitionSplit

use of org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit in project flink by apache.

the class KafkaSourceEnumerator method initializePartitionSplits.

/**
 * Initialize splits for newly discovered partitions.
 *
 * <p>Enumerator will be responsible for fetching offsets when initializing splits if:
 *
 * <ul>
 *   <li>using timestamp for initializing offset
 *   <li>or using specified offset, but the offset is not provided for the newly discovered
 *       partitions
 * </ul>
 *
 * <p>Otherwise offsets will be initialized by readers.
 *
 * <p>NOTE: This method should only be invoked in the worker executor thread, because it
 * potentially requires network I/O with Kafka brokers for fetching offsets.
 *
 * @param partitionChange Newly discovered and removed partitions
 * @return {@link KafkaPartitionSplit} of new partitions and {@link TopicPartition} of removed
 *     partitions
 */
private PartitionSplitChange initializePartitionSplits(PartitionChange partitionChange) {
    Set<TopicPartition> newPartitions = Collections.unmodifiableSet(partitionChange.getNewPartitions());
    OffsetsInitializer.PartitionOffsetsRetriever offsetsRetriever = getOffsetsRetriever();
    Map<TopicPartition, Long> startingOffsets = startingOffsetInitializer.getPartitionOffsets(newPartitions, offsetsRetriever);
    Map<TopicPartition, Long> stoppingOffsets = stoppingOffsetInitializer.getPartitionOffsets(newPartitions, offsetsRetriever);
    Set<KafkaPartitionSplit> partitionSplits = new HashSet<>(newPartitions.size());
    for (TopicPartition tp : newPartitions) {
        Long startingOffset = startingOffsets.get(tp);
        long stoppingOffset = stoppingOffsets.getOrDefault(tp, KafkaPartitionSplit.NO_STOPPING_OFFSET);
        partitionSplits.add(new KafkaPartitionSplit(tp, startingOffset, stoppingOffset));
    }
    return new PartitionSplitChange(partitionSplits, partitionChange.getRemovedPartitions());
}
Also used : KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) OffsetsInitializer(org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer) TopicPartition(org.apache.kafka.common.TopicPartition) HashSet(java.util.HashSet)

Aggregations

KafkaPartitionSplit (org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit)25 TopicPartition (org.apache.kafka.common.TopicPartition)20 Properties (java.util.Properties)10 Test (org.junit.jupiter.api.Test)9 HashSet (java.util.HashSet)7 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 AdminClient (org.apache.kafka.clients.admin.AdminClient)5 ArrayList (java.util.ArrayList)4 Collection (java.util.Collection)4 Test (org.junit.Test)4 Collections (java.util.Collections)3 List (java.util.List)3 Set (java.util.Set)3 Consumer (java.util.function.Consumer)3 MockSplitEnumeratorContext (org.apache.flink.api.connector.source.mocks.MockSplitEnumeratorContext)3 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)3 Arrays (java.util.Arrays)2 Optional (java.util.Optional)2