Search in sources :

Example 1 with WorkerErrantRecordReporter

use of org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter in project kafka by apache.

the class WorkerSinkTask method commitOffsets.

private void commitOffsets(long now, boolean closing, Collection<TopicPartition> topicPartitions) {
    log.trace("Committing offsets for partitions {}", topicPartitions);
    if (workerErrantRecordReporter != null) {
        log.trace("Awaiting reported errors to be completed");
        workerErrantRecordReporter.awaitFutures(topicPartitions);
        log.trace("Completed reported errors");
    }
    Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = currentOffsets.entrySet().stream().filter(e -> topicPartitions.contains(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    if (offsetsToCommit.isEmpty())
        return;
    committing = true;
    commitSeqno += 1;
    commitStarted = now;
    sinkTaskMetricsGroup.recordOffsetSequenceNumber(commitSeqno);
    Map<TopicPartition, OffsetAndMetadata> lastCommittedOffsetsForPartitions = this.lastCommittedOffsets.entrySet().stream().filter(e -> offsetsToCommit.containsKey(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    final Map<TopicPartition, OffsetAndMetadata> taskProvidedOffsets;
    try {
        log.trace("{} Calling task.preCommit with current offsets: {}", this, offsetsToCommit);
        taskProvidedOffsets = task.preCommit(new HashMap<>(offsetsToCommit));
    } catch (Throwable t) {
        if (closing) {
            log.warn("{} Offset commit failed during close", this);
        } else {
            log.error("{} Offset commit failed, rewinding to last committed offsets", this, t);
            for (Map.Entry<TopicPartition, OffsetAndMetadata> entry : lastCommittedOffsetsForPartitions.entrySet()) {
                log.debug("{} Rewinding topic partition {} to offset {}", this, entry.getKey(), entry.getValue().offset());
                consumer.seek(entry.getKey(), entry.getValue().offset());
            }
            currentOffsets.putAll(lastCommittedOffsetsForPartitions);
        }
        onCommitCompleted(t, commitSeqno, null);
        return;
    } finally {
        if (closing) {
            log.trace("{} Closing the task before committing the offsets: {}", this, offsetsToCommit);
            task.close(topicPartitions);
        }
    }
    if (taskProvidedOffsets.isEmpty()) {
        log.debug("{} Skipping offset commit, task opted-out by returning no offsets from preCommit", this);
        onCommitCompleted(null, commitSeqno, null);
        return;
    }
    Collection<TopicPartition> allAssignedTopicPartitions = consumer.assignment();
    final Map<TopicPartition, OffsetAndMetadata> committableOffsets = new HashMap<>(lastCommittedOffsetsForPartitions);
    for (Map.Entry<TopicPartition, OffsetAndMetadata> taskProvidedOffsetEntry : taskProvidedOffsets.entrySet()) {
        final TopicPartition partition = taskProvidedOffsetEntry.getKey();
        final OffsetAndMetadata taskProvidedOffset = taskProvidedOffsetEntry.getValue();
        if (committableOffsets.containsKey(partition)) {
            long taskOffset = taskProvidedOffset.offset();
            long currentOffset = offsetsToCommit.get(partition).offset();
            if (taskOffset <= currentOffset) {
                committableOffsets.put(partition, taskProvidedOffset);
            } else {
                log.warn("{} Ignoring invalid task provided offset {}/{} -- not yet consumed, taskOffset={} currentOffset={}", this, partition, taskProvidedOffset, taskOffset, currentOffset);
            }
        } else if (!allAssignedTopicPartitions.contains(partition)) {
            log.warn("{} Ignoring invalid task provided offset {}/{} -- partition not assigned, assignment={}", this, partition, taskProvidedOffset, allAssignedTopicPartitions);
        } else {
            log.debug("{} Ignoring task provided offset {}/{} -- partition not requested, requested={}", this, partition, taskProvidedOffset, committableOffsets.keySet());
        }
    }
    if (committableOffsets.equals(lastCommittedOffsetsForPartitions)) {
        log.debug("{} Skipping offset commit, no change since last commit", this);
        onCommitCompleted(null, commitSeqno, null);
        return;
    }
    doCommit(committableOffsets, closing, commitSeqno);
}
Also used : Max(org.apache.kafka.common.metrics.stats.Max) Rate(org.apache.kafka.common.metrics.stats.Rate) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) LoggerFactory(org.slf4j.LoggerFactory) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) TOPIC_TRACKING_ENABLE_CONFIG(org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_TRACKING_ENABLE_CONFIG) ConnectHeaders(org.apache.kafka.connect.header.ConnectHeaders) ArrayList(java.util.ArrayList) Headers(org.apache.kafka.connect.header.Headers) HeaderConverter(org.apache.kafka.connect.storage.HeaderConverter) Collections.singleton(java.util.Collections.singleton) Converter(org.apache.kafka.connect.storage.Converter) Duration(java.time.Duration) Map(java.util.Map) OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) WorkerErrantRecordReporter(org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter) MetricGroup(org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup) Value(org.apache.kafka.common.metrics.stats.Value) SinkTask(org.apache.kafka.connect.sink.SinkTask) Utils(org.apache.kafka.common.utils.Utils) TopicPartition(org.apache.kafka.common.TopicPartition) Sensor(org.apache.kafka.common.metrics.Sensor) ConnectUtils(org.apache.kafka.connect.util.ConnectUtils) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) WakeupException(org.apache.kafka.common.errors.WakeupException) SchemaAndValue(org.apache.kafka.connect.data.SchemaAndValue) ClusterConfigState(org.apache.kafka.connect.runtime.distributed.ClusterConfigState) Collection(java.util.Collection) CumulativeSum(org.apache.kafka.common.metrics.stats.CumulativeSum) Collectors(java.util.stream.Collectors) StatusBackingStore(org.apache.kafka.connect.storage.StatusBackingStore) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) RetriableException(org.apache.kafka.connect.errors.RetriableException) List(java.util.List) Stage(org.apache.kafka.connect.runtime.errors.Stage) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) UncheckedCloseable(org.apache.kafka.common.utils.Utils.UncheckedCloseable) Avg(org.apache.kafka.common.metrics.stats.Avg) ConnectException(org.apache.kafka.connect.errors.ConnectException) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) RetryWithToleranceOperator(org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator) Pattern(java.util.regex.Pattern) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with WorkerErrantRecordReporter

use of org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter in project kafka by apache.

the class Worker method buildWorkerTask.

private WorkerTask buildWorkerTask(ClusterConfigState configState, ConnectorConfig connConfig, ConnectorTaskId id, Task task, TaskStatus.Listener statusListener, TargetState initialState, Converter keyConverter, Converter valueConverter, HeaderConverter headerConverter, ClassLoader loader) {
    ErrorHandlingMetrics errorHandlingMetrics = errorHandlingMetrics(id);
    final Class<? extends Connector> connectorClass = plugins.connectorClass(connConfig.getString(ConnectorConfig.CONNECTOR_CLASS_CONFIG));
    RetryWithToleranceOperator retryWithToleranceOperator = new RetryWithToleranceOperator(connConfig.errorRetryTimeout(), connConfig.errorMaxDelayInMillis(), connConfig.errorToleranceType(), Time.SYSTEM);
    retryWithToleranceOperator.metrics(errorHandlingMetrics);
    // Decide which type of worker task we need based on the type of task.
    if (task instanceof SourceTask) {
        SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins, connConfig.originalsStrings(), config.topicCreationEnable());
        retryWithToleranceOperator.reporters(sourceTaskReporters(id, sourceConfig, errorHandlingMetrics));
        TransformationChain<SourceRecord> transformationChain = new TransformationChain<>(sourceConfig.<SourceRecord>transformations(), retryWithToleranceOperator);
        log.info("Initializing: {}", transformationChain);
        CloseableOffsetStorageReader offsetReader = new OffsetStorageReaderImpl(offsetBackingStore, id.connector(), internalKeyConverter, internalValueConverter);
        OffsetStorageWriter offsetWriter = new OffsetStorageWriter(offsetBackingStore, id.connector(), internalKeyConverter, internalValueConverter);
        Map<String, Object> producerProps = producerConfigs(id, "connector-producer-" + id, config, sourceConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
        KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps);
        TopicAdmin admin;
        Map<String, TopicCreationGroup> topicCreationGroups;
        if (config.topicCreationEnable() && sourceConfig.usesTopicCreation()) {
            Map<String, Object> adminProps = adminConfigs(id, "connector-adminclient-" + id, config, sourceConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
            admin = new TopicAdmin(adminProps);
            topicCreationGroups = TopicCreationGroup.configuredGroups(sourceConfig);
        } else {
            admin = null;
            topicCreationGroups = null;
        }
        // Note we pass the configState as it performs dynamic transformations under the covers
        return new WorkerSourceTask(id, (SourceTask) task, statusListener, initialState, keyConverter, valueConverter, headerConverter, transformationChain, producer, admin, topicCreationGroups, offsetReader, offsetWriter, config, configState, metrics, loader, time, retryWithToleranceOperator, herder.statusBackingStore(), executor);
    } else if (task instanceof SinkTask) {
        TransformationChain<SinkRecord> transformationChain = new TransformationChain<>(connConfig.<SinkRecord>transformations(), retryWithToleranceOperator);
        log.info("Initializing: {}", transformationChain);
        SinkConnectorConfig sinkConfig = new SinkConnectorConfig(plugins, connConfig.originalsStrings());
        retryWithToleranceOperator.reporters(sinkTaskReporters(id, sinkConfig, errorHandlingMetrics, connectorClass));
        WorkerErrantRecordReporter workerErrantRecordReporter = createWorkerErrantRecordReporter(sinkConfig, retryWithToleranceOperator, keyConverter, valueConverter, headerConverter);
        Map<String, Object> consumerProps = consumerConfigs(id, config, connConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
        KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
        return new WorkerSinkTask(id, (SinkTask) task, statusListener, initialState, config, configState, metrics, keyConverter, valueConverter, headerConverter, transformationChain, consumer, loader, time, retryWithToleranceOperator, workerErrantRecordReporter, herder.statusBackingStore());
    } else {
        log.error("Tasks must be a subclass of either SourceTask or SinkTask and current is {}", task);
        throw new ConnectException("Tasks must be a subclass of either SourceTask or SinkTask");
    }
}
Also used : OffsetStorageWriter(org.apache.kafka.connect.storage.OffsetStorageWriter) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) TopicCreationGroup(org.apache.kafka.connect.util.TopicCreationGroup) SourceRecord(org.apache.kafka.connect.source.SourceRecord) CloseableOffsetStorageReader(org.apache.kafka.connect.storage.CloseableOffsetStorageReader) ErrorHandlingMetrics(org.apache.kafka.connect.runtime.errors.ErrorHandlingMetrics) ConnectException(org.apache.kafka.connect.errors.ConnectException) WorkerErrantRecordReporter(org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter) RetryWithToleranceOperator(org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator) SinkTask(org.apache.kafka.connect.sink.SinkTask) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) TopicAdmin(org.apache.kafka.connect.util.TopicAdmin) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) OffsetStorageReaderImpl(org.apache.kafka.connect.storage.OffsetStorageReaderImpl) SourceTask(org.apache.kafka.connect.source.SourceTask) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Aggregations

HashMap (java.util.HashMap)2 Map (java.util.Map)2 KafkaConsumer (org.apache.kafka.clients.consumer.KafkaConsumer)2 ConnectException (org.apache.kafka.connect.errors.ConnectException)2 RetryWithToleranceOperator (org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator)2 WorkerErrantRecordReporter (org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter)2 SinkRecord (org.apache.kafka.connect.sink.SinkRecord)2 SinkTask (org.apache.kafka.connect.sink.SinkTask)2 Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections.singleton (java.util.Collections.singleton)1 List (java.util.List)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 Pattern (java.util.regex.Pattern)1 Collectors (java.util.stream.Collectors)1 ConsumerRebalanceListener (org.apache.kafka.clients.consumer.ConsumerRebalanceListener)1 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)1 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)1