Search in sources :

Example 1 with StartupMode

use of org.apache.flink.streaming.connectors.kafka.config.StartupMode in project flink by apache.

the class FlinkKafkaConsumerBase method open.

// ------------------------------------------------------------------------
// Work methods
// ------------------------------------------------------------------------
@Override
public void open(Configuration configuration) throws Exception {
    // determine the offset commit mode
    this.offsetCommitMode = OffsetCommitModes.fromConfiguration(getIsAutoCommitEnabled(), enableCommitOnCheckpoints, ((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled());
    // create the partition discoverer
    this.partitionDiscoverer = createPartitionDiscoverer(topicsDescriptor, getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks());
    this.partitionDiscoverer.open();
    subscribedPartitionsToStartOffsets = new HashMap<>();
    final List<KafkaTopicPartition> allPartitions = partitionDiscoverer.discoverPartitions();
    if (restoredState != null) {
        for (KafkaTopicPartition partition : allPartitions) {
            if (!restoredState.containsKey(partition)) {
                restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
            }
        }
        for (Map.Entry<KafkaTopicPartition, Long> restoredStateEntry : restoredState.entrySet()) {
            // restored partitions that should not be subscribed by this subtask
            if (KafkaTopicPartitionAssigner.assign(restoredStateEntry.getKey(), getRuntimeContext().getNumberOfParallelSubtasks()) == getRuntimeContext().getIndexOfThisSubtask()) {
                subscribedPartitionsToStartOffsets.put(restoredStateEntry.getKey(), restoredStateEntry.getValue());
            }
        }
        if (filterRestoredPartitionsWithCurrentTopicsDescriptor) {
            subscribedPartitionsToStartOffsets.entrySet().removeIf(entry -> {
                if (!topicsDescriptor.isMatchingTopic(entry.getKey().getTopic())) {
                    LOG.warn("{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.", entry.getKey());
                    return true;
                }
                return false;
            });
        }
        LOG.info("Consumer subtask {} will start reading {} partitions with offsets in restored state: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets);
    } else {
        // when the partition is actually read.
        switch(startupMode) {
            case SPECIFIC_OFFSETS:
                if (specificStartupOffsets == null) {
                    throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS + ", but no specific offsets were specified.");
                }
                for (KafkaTopicPartition seedPartition : allPartitions) {
                    Long specificOffset = specificStartupOffsets.get(seedPartition);
                    if (specificOffset != null) {
                        // since the specified offsets represent the next record to read, we
                        // subtract
                        // it by one so that the initial state of the consumer will be correct
                        subscribedPartitionsToStartOffsets.put(seedPartition, specificOffset - 1);
                    } else {
                        // default to group offset behaviour if the user-provided specific
                        // offsets
                        // do not contain a value for this partition
                        subscribedPartitionsToStartOffsets.put(seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
                    }
                }
                break;
            case TIMESTAMP:
                if (startupOffsetsTimestamp == null) {
                    throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.TIMESTAMP + ", but no startup timestamp was specified.");
                }
                for (Map.Entry<KafkaTopicPartition, Long> partitionToOffset : fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp).entrySet()) {
                    subscribedPartitionsToStartOffsets.put(partitionToOffset.getKey(), (partitionToOffset.getValue() == null) ? // we default to using the latest offset for the partition
                    KafkaTopicPartitionStateSentinel.LATEST_OFFSET : // be correct
                    partitionToOffset.getValue() - 1);
                }
                break;
            default:
                for (KafkaTopicPartition seedPartition : allPartitions) {
                    subscribedPartitionsToStartOffsets.put(seedPartition, startupMode.getStateSentinel());
                }
        }
        if (!subscribedPartitionsToStartOffsets.isEmpty()) {
            switch(startupMode) {
                case EARLIEST:
                    LOG.info("Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
                    break;
                case LATEST:
                    LOG.info("Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
                    break;
                case TIMESTAMP:
                    LOG.info("Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), startupOffsetsTimestamp, subscribedPartitionsToStartOffsets.keySet());
                    break;
                case SPECIFIC_OFFSETS:
                    LOG.info("Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), specificStartupOffsets, subscribedPartitionsToStartOffsets.keySet());
                    List<KafkaTopicPartition> partitionsDefaultedToGroupOffsets = new ArrayList<>(subscribedPartitionsToStartOffsets.size());
                    for (Map.Entry<KafkaTopicPartition, Long> subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
                        if (subscribedPartition.getValue() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
                            partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey());
                        }
                    }
                    if (partitionsDefaultedToGroupOffsets.size() > 0) {
                        LOG.warn("Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" + "; their startup offsets will be defaulted to their committed group offsets in Kafka.", getRuntimeContext().getIndexOfThisSubtask(), partitionsDefaultedToGroupOffsets.size(), partitionsDefaultedToGroupOffsets);
                    }
                    break;
                case GROUP_OFFSETS:
                    LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
            }
        } else {
            LOG.info("Consumer subtask {} initially has no partitions to read from.", getRuntimeContext().getIndexOfThisSubtask());
        }
    }
    this.deserializer.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) COMMITS_SUCCEEDED_METRICS_COUNTER(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.COMMITS_SUCCEEDED_METRICS_COUNTER) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) ListState(org.apache.flink.api.common.state.ListState) KafkaTopicPartitionAssigner(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionAssigner) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) KAFKA_CONSUMER_METRICS_GROUP(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.KAFKA_CONSUMER_METRICS_GROUP) Map(java.util.Map) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Collection(java.util.Collection) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) AbstractFetcher(org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher) KafkaTopicsDescriptor(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) MetricGroup(org.apache.flink.metrics.MetricGroup) KafkaCommitCallback(org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) ResultTypeQueryable(org.apache.flink.api.java.typeutils.ResultTypeQueryable) AssignerWithPunctuatedWatermarksAdapter(org.apache.flink.streaming.runtime.operators.util.AssignerWithPunctuatedWatermarksAdapter) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Pattern(java.util.regex.Pattern) Counter(org.apache.flink.metrics.Counter) KafkaTopicPartitionStateSentinel(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateSentinel) AssignerWithPeriodicWatermarksAdapter(org.apache.flink.streaming.runtime.operators.util.AssignerWithPeriodicWatermarksAdapter) HashMap(java.util.HashMap) RuntimeContextInitializationContextAdapters(org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters) AbstractPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) AssignerWithPeriodicWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks) OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) OffsetCommitMode(org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode) LongSerializer(org.apache.flink.api.common.typeutils.base.LongSerializer) Logger(org.slf4j.Logger) Properties(java.util.Properties) Configuration(org.apache.flink.configuration.Configuration) COMMITS_FAILED_METRICS_COUNTER(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.COMMITS_FAILED_METRICS_COUNTER) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) OffsetCommitModes(org.apache.flink.streaming.connectors.kafka.config.OffsetCommitModes) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) TreeMap(java.util.TreeMap) Internal(org.apache.flink.annotation.Internal) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) LinkedMap(org.apache.commons.collections.map.LinkedMap) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) LinkedMap(org.apache.commons.collections.map.LinkedMap)

Example 2 with StartupMode

use of org.apache.flink.streaming.connectors.kafka.config.StartupMode in project flink by apache.

the class KafkaConnectorOptionsUtil method getStartupOptions.

public static StartupOptions getStartupOptions(ReadableConfig tableOptions) {
    final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
    final StartupMode startupMode = tableOptions.getOptional(SCAN_STARTUP_MODE).map(KafkaConnectorOptionsUtil::fromOption).orElse(StartupMode.GROUP_OFFSETS);
    if (startupMode == StartupMode.SPECIFIC_OFFSETS) {
        // It will be refactored after support specific offset for multiple topics in
        // FLINK-18602. We have already checked tableOptions.get(TOPIC) contains one topic in
        // validateScanStartupMode().
        buildSpecificOffsets(tableOptions, tableOptions.get(TOPIC).get(0), specificOffsets);
    }
    final StartupOptions options = new StartupOptions();
    options.startupMode = startupMode;
    options.specificOffsets = specificOffsets;
    if (startupMode == StartupMode.TIMESTAMP) {
        options.startupTimestampMillis = tableOptions.get(SCAN_STARTUP_TIMESTAMP_MILLIS);
    }
    return options;
}
Also used : HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) ScanStartupMode(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanStartupMode) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode)

Example 3 with StartupMode

use of org.apache.flink.streaming.connectors.kafka.config.StartupMode in project flink by apache.

the class UpsertKafkaDynamicTableFactory method createDynamicTableSource.

@Override
public DynamicTableSource createDynamicTableSource(Context context) {
    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
    ReadableConfig tableOptions = helper.getOptions();
    DecodingFormat<DeserializationSchema<RowData>> keyDecodingFormat = helper.discoverDecodingFormat(DeserializationFormatFactory.class, KEY_FORMAT);
    DecodingFormat<DeserializationSchema<RowData>> valueDecodingFormat = helper.discoverDecodingFormat(DeserializationFormatFactory.class, VALUE_FORMAT);
    // Validate the option data type.
    helper.validateExcept(PROPERTIES_PREFIX);
    validateSource(tableOptions, keyDecodingFormat, valueDecodingFormat, context.getPrimaryKeyIndexes());
    Tuple2<int[], int[]> keyValueProjections = createKeyValueProjections(context.getCatalogTable());
    String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null);
    Properties properties = getKafkaProperties(context.getCatalogTable().getOptions());
    // always use earliest to keep data integrity
    StartupMode earliest = StartupMode.EARLIEST;
    return new KafkaDynamicSource(context.getPhysicalRowDataType(), keyDecodingFormat, new DecodingFormatWrapper(valueDecodingFormat), keyValueProjections.f0, keyValueProjections.f1, keyPrefix, getSourceTopics(tableOptions), getSourceTopicPattern(tableOptions), properties, earliest, Collections.emptyMap(), 0, true, context.getObjectIdentifier().asSummaryString());
}
Also used : ReadableConfig(org.apache.flink.configuration.ReadableConfig) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) KafkaConnectorOptionsUtil.getKafkaProperties(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties) Properties(java.util.Properties) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema)

Aggregations

StartupMode (org.apache.flink.streaming.connectors.kafka.config.StartupMode)3 HashMap (java.util.HashMap)2 Properties (java.util.Properties)2 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 List (java.util.List)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Pattern (java.util.regex.Pattern)1 LinkedMap (org.apache.commons.collections.map.LinkedMap)1 Internal (org.apache.flink.annotation.Internal)1 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)1 DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)1 RuntimeContextInitializationContextAdapters (org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters)1 CheckpointListener (org.apache.flink.api.common.state.CheckpointListener)1 ListState (org.apache.flink.api.common.state.ListState)1 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)1