Example 11 with MetricGroup

the class CheckpointStatsTrackerTest method testMetrics.

	 * Tests the registered metrics.
public void testMetrics() throws Exception {
    MetricGroup metricGroup = mock(MetricGroup.class);
    ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
    when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
    new CheckpointStatsTracker(0, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), metricGroup);
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_CHECKPOINTS_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_IN_PROGRESS_CHECKPOINTS_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_COMPLETED_CHECKPOINTS_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_FAILED_CHECKPOINTS_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_RESTORED_CHECKPOINT_TIMESTAMP_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_SIZE_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_DURATION_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_ALIGNMENT_BUFFERED_METRIC), any(Gauge.class));
    verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_EXTERNAL_PATH_METRIC), any(Gauge.class));
    // Make sure this test is adjusted when further metrics are added
    verify(metricGroup, times(9)).gauge(any(String.class), any(Gauge.class));
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) MetricGroup(org.apache.flink.metrics.MetricGroup) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) Gauge(org.apache.flink.metrics.Gauge) Test(org.junit.Test)

Example 12 with MetricGroup

the class FlinkKafkaProducerBase method open.

// ----------------------------------- Utilities --------------------------
	 * Initializes the connection to Kafka.
public void open(Configuration configuration) {
    producer = getKafkaProducer(this.producerConfig);
    RuntimeContext ctx = getRuntimeContext();
    if (partitioner != null) {
        // the fetched list is immutable, so we're creating a mutable copy in order to sort it
        List<PartitionInfo> partitionsList = new ArrayList<>(producer.partitionsFor(defaultTopicId));
        // sort the partitions by partition id to make sure the fetched partition list is the same across subtasks
        Collections.sort(partitionsList, new Comparator<PartitionInfo>() {

            public int compare(PartitionInfo o1, PartitionInfo o2) {
                return, o2.partition());
        partitions = new int[partitionsList.size()];
        for (int i = 0; i < partitions.length; i++) {
            partitions[i] = partitionsList.get(i).partition();
        }, ctx.getNumberOfParallelSubtasks(), partitions);
    }"Starting FlinkKafkaProducer ({}/{}) to produce into topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
    // register Kafka metrics to Flink accumulators
    if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
        Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
        if (metrics == null) {
            // MapR's Kafka implementation returns null here.
  "Producer implementation does not support metrics");
        } else {
            final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
            for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
    if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
        LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
        flushOnCheckpoint = false;
    if (logFailuresOnly) {
        callback = new Callback() {

            public void onCompletion(RecordMetadata metadata, Exception e) {
                if (e != null) {
                    LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
    } else {
        callback = new Callback() {

            public void onCompletion(RecordMetadata metadata, Exception exception) {
                if (exception != null && asyncException == null) {
                    asyncException = exception;
Also used : StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ArrayList(java.util.ArrayList) MetricGroup(org.apache.flink.metrics.MetricGroup) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) MetricName(org.apache.kafka.common.MetricName) Callback(org.apache.kafka.clients.producer.Callback) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) PartitionInfo(org.apache.kafka.common.PartitionInfo) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Map(java.util.Map)

Example 13 with MetricGroup

the class AbstractFetcher method addOffsetStateGauge.

// ------------------------- Metrics ----------------------------------
	 * Add current and committed offsets to metric group
	 * @param metricGroup The metric group to use
protected void addOffsetStateGauge(MetricGroup metricGroup) {
    // add current offsets to gage
    MetricGroup currentOffsets = metricGroup.addGroup("current-offsets");
    MetricGroup committedOffsets = metricGroup.addGroup("committed-offsets");
    for (KafkaTopicPartitionState<?> ktp : subscribedPartitionStates()) {
        currentOffsets.gauge(ktp.getTopic() + "-" + ktp.getPartition(), new OffsetGauge(ktp, OffsetGaugeType.CURRENT_OFFSET));
        committedOffsets.gauge(ktp.getTopic() + "-" + ktp.getPartition(), new OffsetGauge(ktp, OffsetGaugeType.COMMITTED_OFFSET));
Also used : MetricGroup(org.apache.flink.metrics.MetricGroup)

Example 14 with MetricGroup

the class CollectionExecutor method executeUnaryOperator.

private <IN, OUT> List<OUT> executeUnaryOperator(SingleInputOperator<?, ?, ?> operator, int superStep) throws Exception {
    Operator<?> inputOp = operator.getInput();
    if (inputOp == null) {
        throw new InvalidProgramException("The unary operation " + operator.getName() + " has no input.");
    @SuppressWarnings("unchecked") List<IN> inputData = (List<IN>) execute(inputOp, superStep);
    @SuppressWarnings("unchecked") SingleInputOperator<IN, OUT, ?> typedOp = (SingleInputOperator<IN, OUT, ?>) operator;
    // build the runtime context and compute broadcast variables, if necessary
    TaskInfo taskInfo = new TaskInfo(typedOp.getName(), 1, 0, 1, 0);
    RuntimeUDFContext ctx;
    MetricGroup metrics = new UnregisteredMetricsGroup();
    if (RichFunction.class.isAssignableFrom(typedOp.getUserCodeWrapper().getUserCodeClass())) {
        ctx = superStep == 0 ? new RuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics) : new IterationRuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics);
        for (Map.Entry<String, Operator<?>> bcInputs : operator.getBroadcastInputs().entrySet()) {
            List<?> bcData = execute(bcInputs.getValue());
            ctx.setBroadcastVariable(bcInputs.getKey(), bcData);
    } else {
        ctx = null;
    return typedOp.executeOnCollections(inputData, ctx, executionConfig);
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) MetricGroup(org.apache.flink.metrics.MetricGroup) TaskInfo(org.apache.flink.api.common.TaskInfo) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 15 with MetricGroup

the class Kafka08Fetcher method runFetchLoop.

// ------------------------------------------------------------------------
//  Main Work Loop
// ------------------------------------------------------------------------
public void runFetchLoop() throws Exception {
    // the map from broker to the thread that is connected to that broker
    final Map<Node, SimpleConsumerThread<T>> brokerToThread = new HashMap<>();
    // this holds possible the exceptions from the concurrent broker connection threads
    final ExceptionProxy errorHandler = new ExceptionProxy(Thread.currentThread());
    // the offset handler handles the communication with ZooKeeper, to commit externally visible offsets
    final ZookeeperOffsetHandler zookeeperOffsetHandler = new ZookeeperOffsetHandler(kafkaConfig);
    this.zookeeperOffsetHandler = zookeeperOffsetHandler;
    PeriodicOffsetCommitter periodicCommitter = null;
    try {
        // values yet; replace those with actual offsets, according to what the sentinel value represent.
        for (KafkaTopicPartitionState<TopicAndPartition> partition : subscribedPartitionStates()) {
            if (partition.getOffset() == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) {
                // this will be replaced by an actual offset in SimpleConsumerThread
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) {
                // this will be replaced by an actual offset in SimpleConsumerThread
            } else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
                Long committedOffset = zookeeperOffsetHandler.getCommittedOffset(partition.getKafkaTopicPartition());
                if (committedOffset != null) {
                    // the committed offset in ZK represents the next record to process,
                    // so we subtract it by 1 to correctly represent internal state
                    partition.setOffset(committedOffset - 1);
                } else {
                    // if we can't find an offset for a partition in ZK when using GROUP_OFFSETS,
                    // we default to "auto.offset.reset" like the Kafka high-level consumer
                    LOG.warn("No group offset can be found for partition {} in Zookeeper;" + " resetting starting offset to 'auto.offset.reset'", partition);
            } else {
            // the partition already has a specific start offset and is ready to be consumed
        // start the periodic offset committer thread, if necessary
        if (autoCommitInterval > 0) {
  "Starting periodic offset committer, with commit interval of {}ms", autoCommitInterval);
            periodicCommitter = new PeriodicOffsetCommitter(zookeeperOffsetHandler, subscribedPartitionStates(), errorHandler, autoCommitInterval);
            periodicCommitter.setName("Periodic Kafka partition offset committer");
        // register offset metrics
        if (useMetrics) {
            final MetricGroup kafkaMetricGroup = runtimeContext.getMetricGroup().addGroup("KafkaConsumer");
        // Main loop polling elements from the unassignedPartitions queue to the threads
        while (running) {
            // re-throw any exception from the concurrent fetcher threads
            // wait for max 5 seconds trying to get partitions to assign
            // if threads shut down, this poll returns earlier, because the threads inject the
            // special marker into the queue
            List<KafkaTopicPartitionState<TopicAndPartition>> partitionsToAssign = unassignedPartitionsQueue.getBatchBlocking(5000);
            if (!partitionsToAssign.isEmpty()) {
      "Assigning {} partitions to broker threads", partitionsToAssign.size());
                Map<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeaders = findLeaderForPartitions(partitionsToAssign, kafkaConfig);
                // assign the partitions to the leaders (maybe start the threads)
                for (Map.Entry<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeader : partitionsWithLeaders.entrySet()) {
                    final Node leader = partitionsWithLeader.getKey();
                    final List<KafkaTopicPartitionState<TopicAndPartition>> partitions = partitionsWithLeader.getValue();
                    SimpleConsumerThread<T> brokerThread = brokerToThread.get(leader);
                    if (!running) {
                    if (brokerThread == null || !brokerThread.getNewPartitionsQueue().isOpen()) {
                        // start new thread
                        brokerThread = createAndStartSimpleConsumerThread(partitions, leader, errorHandler);
                        brokerToThread.put(leader, brokerThread);
                    } else {
                        // put elements into queue of thread
                        ClosableBlockingQueue<KafkaTopicPartitionState<TopicAndPartition>> newPartitionsQueue = brokerThread.getNewPartitionsQueue();
                        for (KafkaTopicPartitionState<TopicAndPartition> fp : partitions) {
                            if (!newPartitionsQueue.addIfOpen(fp)) {
                                // we were unable to add the partition to the broker's queue
                                // the broker has closed in the meantime (the thread will shut down)
                                // create a new thread for connecting to this broker
                                List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions = new ArrayList<>();
                                brokerThread = createAndStartSimpleConsumerThread(seedPartitions, leader, errorHandler);
                                brokerToThread.put(leader, brokerThread);
                                // update queue for the subsequent partitions
                                newPartitionsQueue = brokerThread.getNewPartitionsQueue();
            } else {
                // there were no partitions to assign. Check if any broker threads shut down.
                // we get into this section of the code, if either the poll timed out, or the
                // blocking poll was woken up by the marker element
                Iterator<SimpleConsumerThread<T>> bttIterator = brokerToThread.values().iterator();
                while (bttIterator.hasNext()) {
                    SimpleConsumerThread<T> thread =;
                    if (!thread.getNewPartitionsQueue().isOpen()) {
              "Removing stopped consumer thread {}", thread.getName());
            if (brokerToThread.size() == 0 && unassignedPartitionsQueue.isEmpty()) {
                if (unassignedPartitionsQueue.close()) {
          "All consumer threads are finished, there are no more unassigned partitions. Stopping fetcher");
            // we end up here if somebody added something to the queue in the meantime --> continue to poll queue again
    } catch (InterruptedException e) {
        // this may be thrown because an exception on one of the concurrent fetcher threads
        // woke this thread up. make sure we throw the root exception instead in that case
        // no other root exception, throw the interrupted exception
        throw e;
    } finally {
        this.running = false;
        this.zookeeperOffsetHandler = null;
        // if we run a periodic committer thread, shut that down
        if (periodicCommitter != null) {
        // clear the interruption flag
        // this allows the joining on consumer threads (on best effort) to happen in
        // case the initial interrupt already
        // make sure that in any case (completion, abort, error), all spawned threads are stopped
        try {
            int runningThreads;
            do {
                // check whether threads are alive and cancel them
                runningThreads = 0;
                Iterator<SimpleConsumerThread<T>> threads = brokerToThread.values().iterator();
                while (threads.hasNext()) {
                    SimpleConsumerThread<?> t =;
                    if (t.isAlive()) {
                    } else {
                // wait for the threads to finish, before issuing a cancel call again
                if (runningThreads > 0) {
                    for (SimpleConsumerThread<?> t : brokerToThread.values()) {
                        t.join(500 / runningThreads + 1);
            } while (runningThreads > 0);
        } catch (InterruptedException ignored) {
            // waiting for the thread shutdown apparently got interrupted
            // restore interrupted state and continue
        } catch (Throwable t) {
            // we catch all here to preserve the original exception
            LOG.error("Exception while shutting down consumer threads", t);
        try {
        } catch (Throwable t) {
            // we catch all here to preserve the original exception
            LOG.error("Exception while shutting down ZookeeperOffsetHandler", t);
Also used : HashMap(java.util.HashMap) Node(org.apache.kafka.common.Node) MetricGroup(org.apache.flink.metrics.MetricGroup) ArrayList(java.util.ArrayList) TopicAndPartition(kafka.common.TopicAndPartition) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)


