use of org.apache.flink.metrics.MetricGroup in project flink by apache.
the class CheckpointStatsTrackerTest method testMetrics.
/**
* Tests the registered metrics.
*/
@Test
public void testMetrics() throws Exception {
MetricGroup metricGroup = mock(MetricGroup.class);
ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
when(jobVertex.getParallelism()).thenReturn(1);
new CheckpointStatsTracker(0, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), metricGroup);
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_IN_PROGRESS_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_COMPLETED_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_FAILED_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_RESTORED_CHECKPOINT_TIMESTAMP_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_SIZE_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_DURATION_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_ALIGNMENT_BUFFERED_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_EXTERNAL_PATH_METRIC), any(Gauge.class));
// Make sure this test is adjusted when further metrics are added
verify(metricGroup, times(9)).gauge(any(String.class), any(Gauge.class));
}
use of org.apache.flink.metrics.MetricGroup in project flink by apache.
the class FlinkKafkaProducerBase method open.
// ----------------------------------- Utilities --------------------------
/**
* Initializes the connection to Kafka.
*/
@Override
public void open(Configuration configuration) {
producer = getKafkaProducer(this.producerConfig);
RuntimeContext ctx = getRuntimeContext();
if (partitioner != null) {
// the fetched list is immutable, so we're creating a mutable copy in order to sort it
List<PartitionInfo> partitionsList = new ArrayList<>(producer.partitionsFor(defaultTopicId));
// sort the partitions by partition id to make sure the fetched partition list is the same across subtasks
Collections.sort(partitionsList, new Comparator<PartitionInfo>() {
@Override
public int compare(PartitionInfo o1, PartitionInfo o2) {
return Integer.compare(o1.partition(), o2.partition());
}
});
partitions = new int[partitionsList.size()];
for (int i = 0; i < partitions.length; i++) {
partitions[i] = partitionsList.get(i).partition();
}
partitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks(), partitions);
}
LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
// register Kafka metrics to Flink accumulators
if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
if (metrics == null) {
// MapR's Kafka implementation returns null here.
LOG.info("Producer implementation does not support metrics");
} else {
final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
}
}
}
if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
flushOnCheckpoint = false;
}
if (logFailuresOnly) {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
}
acknowledgeMessage();
}
};
} else {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null && asyncException == null) {
asyncException = exception;
}
acknowledgeMessage();
}
};
}
}
use of org.apache.flink.metrics.MetricGroup in project flink by apache.
the class AbstractFetcher method addOffsetStateGauge.
// ------------------------- Metrics ----------------------------------
/**
* Add current and committed offsets to metric group
*
* @param metricGroup The metric group to use
*/
protected void addOffsetStateGauge(MetricGroup metricGroup) {
// add current offsets to gage
MetricGroup currentOffsets = metricGroup.addGroup("current-offsets");
MetricGroup committedOffsets = metricGroup.addGroup("committed-offsets");
for (KafkaTopicPartitionState<?> ktp : subscribedPartitionStates()) {
currentOffsets.gauge(ktp.getTopic() + "-" + ktp.getPartition(), new OffsetGauge(ktp, OffsetGaugeType.CURRENT_OFFSET));
committedOffsets.gauge(ktp.getTopic() + "-" + ktp.getPartition(), new OffsetGauge(ktp, OffsetGaugeType.COMMITTED_OFFSET));
}
}
use of org.apache.flink.metrics.MetricGroup in project flink by apache.
the class CollectionExecutor method executeUnaryOperator.
private <IN, OUT> List<OUT> executeUnaryOperator(SingleInputOperator<?, ?, ?> operator, int superStep) throws Exception {
Operator<?> inputOp = operator.getInput();
if (inputOp == null) {
throw new InvalidProgramException("The unary operation " + operator.getName() + " has no input.");
}
@SuppressWarnings("unchecked") List<IN> inputData = (List<IN>) execute(inputOp, superStep);
@SuppressWarnings("unchecked") SingleInputOperator<IN, OUT, ?> typedOp = (SingleInputOperator<IN, OUT, ?>) operator;
// build the runtime context and compute broadcast variables, if necessary
TaskInfo taskInfo = new TaskInfo(typedOp.getName(), 1, 0, 1, 0);
RuntimeUDFContext ctx;
MetricGroup metrics = new UnregisteredMetricsGroup();
if (RichFunction.class.isAssignableFrom(typedOp.getUserCodeWrapper().getUserCodeClass())) {
ctx = superStep == 0 ? new RuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics) : new IterationRuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics);
for (Map.Entry<String, Operator<?>> bcInputs : operator.getBroadcastInputs().entrySet()) {
List<?> bcData = execute(bcInputs.getValue());
ctx.setBroadcastVariable(bcInputs.getKey(), bcData);
}
} else {
ctx = null;
}
return typedOp.executeOnCollections(inputData, ctx, executionConfig);
}
use of org.apache.flink.metrics.MetricGroup in project flink by apache.
the class Kafka08Fetcher method runFetchLoop.
// ------------------------------------------------------------------------
// Main Work Loop
// ------------------------------------------------------------------------
@Override
public void runFetchLoop() throws Exception {
// the map from broker to the thread that is connected to that broker
final Map<Node, SimpleConsumerThread<T>> brokerToThread = new HashMap<>();
// this holds possible the exceptions from the concurrent broker connection threads
final ExceptionProxy errorHandler = new ExceptionProxy(Thread.currentThread());
// the offset handler handles the communication with ZooKeeper, to commit externally visible offsets
final ZookeeperOffsetHandler zookeeperOffsetHandler = new ZookeeperOffsetHandler(kafkaConfig);
this.zookeeperOffsetHandler = zookeeperOffsetHandler;
PeriodicOffsetCommitter periodicCommitter = null;
try {
// values yet; replace those with actual offsets, according to what the sentinel value represent.
for (KafkaTopicPartitionState<TopicAndPartition> partition : subscribedPartitionStates()) {
if (partition.getOffset() == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) {
// this will be replaced by an actual offset in SimpleConsumerThread
partition.setOffset(OffsetRequest.EarliestTime());
} else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) {
// this will be replaced by an actual offset in SimpleConsumerThread
partition.setOffset(OffsetRequest.LatestTime());
} else if (partition.getOffset() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
Long committedOffset = zookeeperOffsetHandler.getCommittedOffset(partition.getKafkaTopicPartition());
if (committedOffset != null) {
// the committed offset in ZK represents the next record to process,
// so we subtract it by 1 to correctly represent internal state
partition.setOffset(committedOffset - 1);
} else {
// if we can't find an offset for a partition in ZK when using GROUP_OFFSETS,
// we default to "auto.offset.reset" like the Kafka high-level consumer
LOG.warn("No group offset can be found for partition {} in Zookeeper;" + " resetting starting offset to 'auto.offset.reset'", partition);
partition.setOffset(invalidOffsetBehavior);
}
} else {
// the partition already has a specific start offset and is ready to be consumed
}
}
// start the periodic offset committer thread, if necessary
if (autoCommitInterval > 0) {
LOG.info("Starting periodic offset committer, with commit interval of {}ms", autoCommitInterval);
periodicCommitter = new PeriodicOffsetCommitter(zookeeperOffsetHandler, subscribedPartitionStates(), errorHandler, autoCommitInterval);
periodicCommitter.setName("Periodic Kafka partition offset committer");
periodicCommitter.setDaemon(true);
periodicCommitter.start();
}
// register offset metrics
if (useMetrics) {
final MetricGroup kafkaMetricGroup = runtimeContext.getMetricGroup().addGroup("KafkaConsumer");
addOffsetStateGauge(kafkaMetricGroup);
}
// Main loop polling elements from the unassignedPartitions queue to the threads
while (running) {
// re-throw any exception from the concurrent fetcher threads
errorHandler.checkAndThrowException();
// wait for max 5 seconds trying to get partitions to assign
// if threads shut down, this poll returns earlier, because the threads inject the
// special marker into the queue
List<KafkaTopicPartitionState<TopicAndPartition>> partitionsToAssign = unassignedPartitionsQueue.getBatchBlocking(5000);
partitionsToAssign.remove(MARKER);
if (!partitionsToAssign.isEmpty()) {
LOG.info("Assigning {} partitions to broker threads", partitionsToAssign.size());
Map<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeaders = findLeaderForPartitions(partitionsToAssign, kafkaConfig);
// assign the partitions to the leaders (maybe start the threads)
for (Map.Entry<Node, List<KafkaTopicPartitionState<TopicAndPartition>>> partitionsWithLeader : partitionsWithLeaders.entrySet()) {
final Node leader = partitionsWithLeader.getKey();
final List<KafkaTopicPartitionState<TopicAndPartition>> partitions = partitionsWithLeader.getValue();
SimpleConsumerThread<T> brokerThread = brokerToThread.get(leader);
if (!running) {
break;
}
if (brokerThread == null || !brokerThread.getNewPartitionsQueue().isOpen()) {
// start new thread
brokerThread = createAndStartSimpleConsumerThread(partitions, leader, errorHandler);
brokerToThread.put(leader, brokerThread);
} else {
// put elements into queue of thread
ClosableBlockingQueue<KafkaTopicPartitionState<TopicAndPartition>> newPartitionsQueue = brokerThread.getNewPartitionsQueue();
for (KafkaTopicPartitionState<TopicAndPartition> fp : partitions) {
if (!newPartitionsQueue.addIfOpen(fp)) {
// we were unable to add the partition to the broker's queue
// the broker has closed in the meantime (the thread will shut down)
// create a new thread for connecting to this broker
List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions = new ArrayList<>();
seedPartitions.add(fp);
brokerThread = createAndStartSimpleConsumerThread(seedPartitions, leader, errorHandler);
brokerToThread.put(leader, brokerThread);
// update queue for the subsequent partitions
newPartitionsQueue = brokerThread.getNewPartitionsQueue();
}
}
}
}
} else {
// there were no partitions to assign. Check if any broker threads shut down.
// we get into this section of the code, if either the poll timed out, or the
// blocking poll was woken up by the marker element
Iterator<SimpleConsumerThread<T>> bttIterator = brokerToThread.values().iterator();
while (bttIterator.hasNext()) {
SimpleConsumerThread<T> thread = bttIterator.next();
if (!thread.getNewPartitionsQueue().isOpen()) {
LOG.info("Removing stopped consumer thread {}", thread.getName());
bttIterator.remove();
}
}
}
if (brokerToThread.size() == 0 && unassignedPartitionsQueue.isEmpty()) {
if (unassignedPartitionsQueue.close()) {
LOG.info("All consumer threads are finished, there are no more unassigned partitions. Stopping fetcher");
break;
}
// we end up here if somebody added something to the queue in the meantime --> continue to poll queue again
}
}
} catch (InterruptedException e) {
// this may be thrown because an exception on one of the concurrent fetcher threads
// woke this thread up. make sure we throw the root exception instead in that case
errorHandler.checkAndThrowException();
// no other root exception, throw the interrupted exception
throw e;
} finally {
this.running = false;
this.zookeeperOffsetHandler = null;
// if we run a periodic committer thread, shut that down
if (periodicCommitter != null) {
periodicCommitter.shutdown();
}
// clear the interruption flag
// this allows the joining on consumer threads (on best effort) to happen in
// case the initial interrupt already
Thread.interrupted();
// make sure that in any case (completion, abort, error), all spawned threads are stopped
try {
int runningThreads;
do {
// check whether threads are alive and cancel them
runningThreads = 0;
Iterator<SimpleConsumerThread<T>> threads = brokerToThread.values().iterator();
while (threads.hasNext()) {
SimpleConsumerThread<?> t = threads.next();
if (t.isAlive()) {
t.cancel();
runningThreads++;
} else {
threads.remove();
}
}
// wait for the threads to finish, before issuing a cancel call again
if (runningThreads > 0) {
for (SimpleConsumerThread<?> t : brokerToThread.values()) {
t.join(500 / runningThreads + 1);
}
}
} while (runningThreads > 0);
} catch (InterruptedException ignored) {
// waiting for the thread shutdown apparently got interrupted
// restore interrupted state and continue
Thread.currentThread().interrupt();
} catch (Throwable t) {
// we catch all here to preserve the original exception
LOG.error("Exception while shutting down consumer threads", t);
}
try {
zookeeperOffsetHandler.close();
} catch (Throwable t) {
// we catch all here to preserve the original exception
LOG.error("Exception while shutting down ZookeeperOffsetHandler", t);
}
}
}
Aggregations