Search in sources :

Example 1 with KafkaMetricWrapper

use of org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper in project flink by apache.

the class KafkaConsumerThread method run.

// ------------------------------------------------------------------------
@Override
public void run() {
    // early exit check
    if (!running) {
        return;
    }
    // this is the means to talk to FlinkKafkaConsumer's main thread
    final Handover handover = this.handover;
    // This method initializes the KafkaConsumer and guarantees it is torn down properly.
    // This is important, because the consumer has multi-threading issues,
    // including concurrent 'close()' calls.
    final KafkaConsumer<byte[], byte[]> consumer;
    try {
        consumer = new KafkaConsumer<>(kafkaProperties);
    } catch (Throwable t) {
        handover.reportError(t);
        return;
    }
    // from here on, the consumer is guaranteed to be closed properly
    try {
        // The callback invoked by Kafka once an offset commit is complete
        final OffsetCommitCallback offsetCommitCallback = new CommitCallback();
        // tell the consumer which partitions to work with
        consumerCallBridge.assignPartitions(consumer, convertKafkaPartitions(subscribedPartitionStates));
        // register Kafka's very own metrics in Flink's metric reporters
        if (useMetrics) {
            // register Kafka metrics to Flink
            Map<MetricName, ? extends Metric> metrics = consumer.metrics();
            if (metrics == null) {
                // MapR's Kafka implementation returns null here.
                log.info("Consumer implementation does not support metrics");
            } else {
                // we have Kafka metrics, register them
                for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                    kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
                }
            }
        }
        // early exit check
        if (!running) {
            return;
        }
        // values yet; replace those with actual offsets, according to what the sentinel value represent.
        for (KafkaTopicPartitionState<TopicPartition> partition : subscribedPartitionStates) {
            if (partition.getOffset() == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) {
                consumerCallBridge.seekPartitionToBeginning(consumer, partition.getKafkaPartitionHandle());
                partition.setOffset(consumer.position(partition.getKafkaPartitionHandle()) - 1);
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) {
                consumerCallBridge.seekPartitionToEnd(consumer, partition.getKafkaPartitionHandle());
                partition.setOffset(consumer.position(partition.getKafkaPartitionHandle()) - 1);
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
                // the KafkaConsumer by default will automatically seek the consumer position
                // to the committed group offset, so we do not need to do it.
                partition.setOffset(consumer.position(partition.getKafkaPartitionHandle()) - 1);
            } else {
                consumer.seek(partition.getKafkaPartitionHandle(), partition.getOffset() + 1);
            }
        }
        // from now on, external operations may call the consumer
        this.consumer = consumer;
        // the latest bulk of records. may carry across the loop if the thread is woken up
        // from blocking on the handover
        ConsumerRecords<byte[], byte[]> records = null;
        // main fetch loop
        while (running) {
            // check if there is something to commit
            if (!commitInProgress) {
                // get and reset the work-to-be committed, so we don't repeatedly commit the same
                final Map<TopicPartition, OffsetAndMetadata> toCommit = nextOffsetsToCommit.getAndSet(null);
                if (toCommit != null) {
                    log.debug("Sending async offset commit request to Kafka broker");
                    // also record that a commit is already in progress
                    // the order here matters! first set the flag, then send the commit command.
                    commitInProgress = true;
                    consumer.commitAsync(toCommit, offsetCommitCallback);
                }
            }
            // get the next batch of records, unless we did not manage to hand the old batch over
            if (records == null) {
                try {
                    records = consumer.poll(pollTimeout);
                } catch (WakeupException we) {
                    continue;
                }
            }
            try {
                handover.produce(records);
                records = null;
            } catch (Handover.WakeupException e) {
            // fall through the loop
            }
        }
    // end main fetch loop
    } catch (Throwable t) {
        // let the main thread know and exit
        // it may be that this exception comes because the main thread closed the handover, in
        // which case the below reporting is irrelevant, but does not hurt either
        handover.reportError(t);
    } finally {
        // make sure the handover is closed if it is not already closed or has an error
        handover.close();
        // make sure the KafkaConsumer is closed
        try {
            consumer.close();
        } catch (Throwable t) {
            log.warn("Error while closing Kafka consumer", t);
        }
    }
}
Also used : OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) WakeupException(org.apache.kafka.common.errors.WakeupException) MetricName(org.apache.kafka.common.MetricName) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) Map(java.util.Map)

Example 2 with KafkaMetricWrapper

use of org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper in project flink by apache.

the class FlinkKafkaProducerBase method open.

// ----------------------------------- Utilities --------------------------
/**
 * Initializes the connection to Kafka.
 */
@Override
public void open(Configuration configuration) throws Exception {
    if (schema instanceof KeyedSerializationSchemaWrapper) {
        ((KeyedSerializationSchemaWrapper<IN>) schema).getSerializationSchema().open(RuntimeContextInitializationContextAdapters.serializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
    }
    producer = getKafkaProducer(this.producerConfig);
    RuntimeContext ctx = getRuntimeContext();
    if (null != flinkKafkaPartitioner) {
        flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
    }
    LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into default topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
    // register Kafka metrics to Flink accumulators
    if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
        Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
        if (metrics == null) {
            // MapR's Kafka implementation returns null here.
            LOG.info("Producer implementation does not support metrics");
        } else {
            final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
            for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
            }
        }
    }
    if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
        LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
        flushOnCheckpoint = false;
    }
    if (logFailuresOnly) {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception e) {
                if (e != null) {
                    LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
                }
                acknowledgeMessage();
            }
        };
    } else {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception exception) {
                if (exception != null && asyncException == null) {
                    asyncException = exception;
                }
                acknowledgeMessage();
            }
        };
    }
}
Also used : RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) KeyedSerializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RuntimeContextInitializationContextAdapters(org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) NetUtils(org.apache.flink.util.NetUtils) ArrayList(java.util.ArrayList) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Metric(org.apache.kafka.common.Metric) MetricName(org.apache.kafka.common.MetricName) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Logger(org.slf4j.Logger) Properties(java.util.Properties) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Configuration(org.apache.flink.configuration.Configuration) PartitionInfo(org.apache.kafka.common.PartitionInfo) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) FlinkKafkaPartitioner(org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner) KeyedSerializationSchema(org.apache.flink.streaming.util.serialization.KeyedSerializationSchema) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) SerializableObject(org.apache.flink.util.SerializableObject) MetricGroup(org.apache.flink.metrics.MetricGroup) List(java.util.List) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Internal(org.apache.flink.annotation.Internal) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) Comparator(java.util.Comparator) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Callback(org.apache.kafka.clients.producer.Callback) Collections(java.util.Collections) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) KeyedSerializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper) MetricGroup(org.apache.flink.metrics.MetricGroup) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) MetricName(org.apache.kafka.common.MetricName) Callback(org.apache.kafka.clients.producer.Callback) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with KafkaMetricWrapper

use of org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper in project flink by apache.

the class KafkaConsumerThread method run.

// ------------------------------------------------------------------------
@Override
public void run() {
    // early exit check
    if (!running) {
        return;
    }
    // this is the means to talk to FlinkKafkaConsumer's main thread
    final Handover handover = this.handover;
    // including concurrent 'close()' calls.
    try {
        this.consumer = getConsumer(kafkaProperties);
    } catch (Throwable t) {
        handover.reportError(t);
        return;
    }
    // from here on, the consumer is guaranteed to be closed properly
    try {
        // register Kafka's very own metrics in Flink's metric reporters
        if (useMetrics) {
            // register Kafka metrics to Flink
            Map<MetricName, ? extends Metric> metrics = consumer.metrics();
            if (metrics == null) {
                // MapR's Kafka implementation returns null here.
                log.info("Consumer implementation does not support metrics");
            } else {
                // we have Kafka metrics, register them
                for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                    consumerMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
                    // TODO this metric is kept for compatibility purposes; should remove in the
                    // future
                    subtaskMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
                }
            }
        }
        // early exit check
        if (!running) {
            return;
        }
        // the latest bulk of records. May carry across the loop if the thread is woken up
        // from blocking on the handover
        ConsumerRecords<byte[], byte[]> records = null;
        // reused variable to hold found unassigned new partitions.
        // found partitions are not carried across loops using this variable;
        // they are carried across via re-adding them to the unassigned partitions queue
        List<KafkaTopicPartitionState<T, TopicPartition>> newPartitions;
        // main fetch loop
        while (running) {
            // check if there is something to commit
            if (!commitInProgress) {
                // get and reset the work-to-be committed, so we don't repeatedly commit the
                // same
                final Tuple2<Map<TopicPartition, OffsetAndMetadata>, KafkaCommitCallback> commitOffsetsAndCallback = nextOffsetsToCommit.getAndSet(null);
                if (commitOffsetsAndCallback != null) {
                    log.debug("Sending async offset commit request to Kafka broker");
                    // also record that a commit is already in progress
                    // the order here matters! first set the flag, then send the commit command.
                    commitInProgress = true;
                    consumer.commitAsync(commitOffsetsAndCallback.f0, new CommitCallback(commitOffsetsAndCallback.f1));
                }
            }
            try {
                if (hasAssignedPartitions) {
                    newPartitions = unassignedPartitionsQueue.pollBatch();
                } else {
                    // if no assigned partitions block until we get at least one
                    // instead of hot spinning this loop. We rely on a fact that
                    // unassignedPartitionsQueue will be closed on a shutdown, so
                    // we don't block indefinitely
                    newPartitions = unassignedPartitionsQueue.getBatchBlocking();
                }
                if (newPartitions != null) {
                    reassignPartitions(newPartitions);
                }
            } catch (AbortedReassignmentException e) {
                continue;
            }
            if (!hasAssignedPartitions) {
                // Without assigned partitions KafkaConsumer.poll will throw an exception
                continue;
            }
            // over
            if (records == null) {
                try {
                    records = consumer.poll(pollTimeout);
                } catch (WakeupException we) {
                    continue;
                }
            }
            try {
                handover.produce(records);
                records = null;
            } catch (Handover.WakeupException e) {
            // fall through the loop
            }
        }
    // end main fetch loop
    } catch (Throwable t) {
        // let the main thread know and exit
        // it may be that this exception comes because the main thread closed the handover, in
        // which case the below reporting is irrelevant, but does not hurt either
        handover.reportError(t);
    } finally {
        // make sure the handover is closed if it is not already closed or has an error
        handover.close();
        // make sure the KafkaConsumer is closed
        try {
            consumer.close();
        } catch (Throwable t) {
            log.warn("Error while closing Kafka consumer", t);
        }
    }
}
Also used : OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) WakeupException(org.apache.kafka.common.errors.WakeupException) MetricName(org.apache.kafka.common.MetricName) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Map (java.util.Map)3 KafkaMetricWrapper (org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper)3 MetricName (org.apache.kafka.common.MetricName)3 HashMap (java.util.HashMap)2 OffsetCommitCallback (org.apache.kafka.clients.consumer.OffsetCommitCallback)2 WakeupException (org.apache.kafka.common.errors.WakeupException)2 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Objects.requireNonNull (java.util.Objects.requireNonNull)1 Properties (java.util.Properties)1 Internal (org.apache.flink.annotation.Internal)1 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)1 RuntimeContextInitializationContextAdapters (org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters)1 ClosureCleaner (org.apache.flink.api.java.ClosureCleaner)1 Configuration (org.apache.flink.configuration.Configuration)1 MetricGroup (org.apache.flink.metrics.MetricGroup)1