Search in sources :

Example 1 with RawMetricType

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType in project cruise-control by linkedin.

the class PrometheusMetricSampler method retrieveMetricsForProcessing.

@Override
protected int retrieveMetricsForProcessing(MetricSamplerOptions metricSamplerOptions) throws SamplingException {
    int metricsAdded = 0;
    int resultsSkipped = 0;
    for (Map.Entry<RawMetricType, String> metricToQueryEntry : _metricToPrometheusQueryMap.entrySet()) {
        final RawMetricType metricType = metricToQueryEntry.getKey();
        final String prometheusQuery = metricToQueryEntry.getValue();
        final List<PrometheusQueryResult> prometheusQueryResults;
        try {
            prometheusQueryResults = _prometheusAdapter.queryMetric(prometheusQuery, metricSamplerOptions.startTimeMs(), metricSamplerOptions.endTimeMs());
        } catch (IOException e) {
            LOG.error("Error when attempting to query Prometheus metrics", e);
            throw new SamplingException("Could not query metrics from Prometheus");
        }
        for (PrometheusQueryResult result : prometheusQueryResults) {
            try {
                switch(metricType.metricScope()) {
                    case BROKER:
                        metricsAdded += addBrokerMetrics(metricSamplerOptions.cluster(), metricType, result);
                        break;
                    case TOPIC:
                        metricsAdded += addTopicMetrics(metricSamplerOptions.cluster(), metricType, result);
                        break;
                    case PARTITION:
                        metricsAdded += addPartitionMetrics(metricSamplerOptions.cluster(), metricType, result);
                        break;
                    default:
                        // Not supported.
                        break;
                }
            } catch (InvalidPrometheusResultException e) {
                /* We can ignore invalid or malformed Prometheus results, for example one which has a hostname
                    that could not be matched to any broker, or one where the topic name is null. Such records
                    will not be converted to metrics. There are valid use cases where this may occur - for instance,
                    when a Prometheus server store metrics from multiple Kafka clusters, in which case the hostname
                    may not correspond to any of this cluster's broker hosts.

                    This can be really frequent, and hence, we are only going to log them at trace level.
                     */
                LOG.trace("Invalid query result received from Prometheus for query {}", prometheusQuery, e);
                resultsSkipped++;
            }
        }
    }
    LOG.info("Added {} metric values. Skipped {} invalid query results.", metricsAdded, resultsSkipped);
    return metricsAdded;
}
Also used : RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) PrometheusQueryResult(com.linkedin.kafka.cruisecontrol.monitor.sampling.prometheus.model.PrometheusQueryResult) IOException(java.io.IOException) SamplingException(com.linkedin.kafka.cruisecontrol.exception.SamplingException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with RawMetricType

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType in project cruise-control by linkedin.

the class RawMetricsHolder method recordCruiseControlMetric.

/**
 * Record a cruise control metric value.
 * @param ccm the {@link CruiseControlMetric} to record.
 */
void recordCruiseControlMetric(CruiseControlMetric ccm) {
    RawMetricType rawMetricType = ccm.rawMetricType();
    ValueHolder valueHolder = _rawMetricsByType.computeIfAbsent(rawMetricType, mt -> getValueHolderFor(rawMetricType));
    valueHolder.recordValue(ccm.value(), ccm.time());
}
Also used : RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType)

Example 3 with RawMetricType

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType in project cruise-control by linkedin.

the class SamplingUtils method buildBrokerMetricSample.

/**
 * Create a {@link BrokerMetricSample}, record the relevant metrics for the given broker, and return the sample.
 *
 * @param node Node hosting the broker.
 * @param brokerLoadById Load information for brokers by the broker id.
 * @param maxMetricTimestamp Maximum timestamp of the sampled metric during the sampling process.
 * @return Metric sample populated with broker metrics, or {@code null} if sample generation is skipped.
 */
static BrokerMetricSample buildBrokerMetricSample(Node node, Map<Integer, BrokerLoad> brokerLoadById, long maxMetricTimestamp) throws UnknownVersionException {
    BrokerLoad brokerLoad = brokerLoadById.get(node.id());
    if (skipBuildingBrokerMetricSample(brokerLoad, node.id())) {
        return null;
    }
    MetricDef brokerMetricDef = KafkaMetricDef.brokerMetricDef();
    BrokerMetricSample bms = new BrokerMetricSample(node.host(), node.id(), brokerLoad.brokerSampleDeserializationVersion());
    for (Map.Entry<Byte, Set<RawMetricType>> entry : RawMetricType.brokerMetricTypesDiffByVersion().entrySet()) {
        for (RawMetricType rawBrokerMetricType : entry.getValue()) {
            // We require the broker to report all the metric types (including nullable values). Otherwise we skip the broker.
            if (!brokerLoad.brokerMetricAvailable(rawBrokerMetricType)) {
                LOG.warn("{}broker {} because it does not have {} metrics (serde version {}) or the metrics are inconsistent.", SKIP_BUILDING_SAMPLE_PREFIX, node.id(), rawBrokerMetricType, entry.getKey());
                return null;
            } else {
                MetricInfo metricInfo = brokerMetricDef.metricInfo(KafkaMetricDef.forRawMetricType(rawBrokerMetricType).name());
                double metricValue = brokerLoad.brokerMetric(rawBrokerMetricType);
                bms.record(metricInfo, metricValue);
            }
        }
    }
    // Disk usage is not one of the broker raw metric type.
    bms.record(brokerMetricDef.metricInfo(KafkaMetricDef.DISK_USAGE.name()), brokerLoad.diskUsage());
    bms.close(maxMetricTimestamp);
    return bms;
}
Also used : Set(java.util.Set) BrokerLoad(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerLoad) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) MetricInfo(com.linkedin.cruisecontrol.metricdef.MetricInfo) MetricDef(com.linkedin.cruisecontrol.metricdef.MetricDef) KafkaMetricDef(com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef) BrokerMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerMetricSample) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with RawMetricType

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType in project cruise-control by linkedin.

the class BrokerLoad method prepareBrokerMetrics.

/**
 * Due to the yammer metric exponential decaying mechanism, the broker metric and the sum of the partition metrics
 * on the same broker may differ by a lot. Our experience shows that in that case, the sum of the topic/partition
 * level metrics are more accurate. So we will just replace the following metrics with the sum of topic/partition
 * level metrics:
 * <ul>
 *   <li>BrokerProduceRate</li>
 *   <li>BrokerFetchRate</li>
 *   <li>BrokerLeaderBytesInRate</li>
 *   <li>BrokerLeaderBytesOutRate</li>
 *   <li>BrokerReplicationBytesInRate</li>
 *   <li>BrokerReplicationBytesOutRate</li>
 *   <li>BrokerMessagesInRate</li>
 * </ul>
 *
 * We use the cluster metadata to check if the reported topic level metrics are complete. If the reported topic
 * level metrics are not complete, we ignore the broker metric sample by setting the _minRequiredBrokerMetricsAvailable
 * flag to false.
 *
 * @param cluster The Kafka cluster.
 * @param brokerId The broker id to prepare metrics for.
 * @param time The last sample time.
 */
public void prepareBrokerMetrics(Cluster cluster, int brokerId, long time) {
    boolean enoughTopicPartitionMetrics = enoughTopicPartitionMetrics(cluster, brokerId);
    // Ensure there are enough topic level metrics.
    if (enoughTopicPartitionMetrics) {
        Map<RawMetricType, Double> sumOfTopicMetrics = new HashMap<>();
        for (String dotHandledTopic : _dotHandledTopicsWithPartitionSizeReported) {
            METRIC_TYPES_TO_SUM.keySet().forEach(type -> {
                double value = topicMetrics(dotHandledTopic, type, false);
                sumOfTopicMetrics.compute(type, (t, v) -> (v == null ? 0 : v) + value);
            });
        }
        for (Map.Entry<RawMetricType, Double> entry : sumOfTopicMetrics.entrySet()) {
            RawMetricType rawTopicMetricType = entry.getKey();
            double value = entry.getValue();
            _brokerMetrics.setRawMetricValue(METRIC_TYPES_TO_SUM.get(rawTopicMetricType), value, time);
        }
    }
    // Check if all the broker raw metrics are available.
    maybeSetBrokerRawMetrics(cluster, brokerId, time);
    // A broker metric is only available if it has enough valid topic metrics and it has reported
    // replication bytes in/out metrics.
    _minRequiredBrokerMetricsAvailable = enoughTopicPartitionMetrics && _missingBrokerMetricsInMinSupportedVersion.isEmpty();
}
Also used : HashMap(java.util.HashMap) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with RawMetricType

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType in project cruise-control by linkedin.

the class BrokerLoad method recordMetric.

/**
 * Record the given Cruise Control metric.
 *
 * @param ccm Cruise Control metric.
 */
public void recordMetric(CruiseControlMetric ccm) {
    RawMetricType rawMetricType = ccm.rawMetricType();
    switch(rawMetricType.metricScope()) {
        case BROKER:
            _brokerMetrics.recordCruiseControlMetric(ccm);
            break;
        case TOPIC:
            TopicMetric tm = (TopicMetric) ccm;
            _dotHandledTopicMetrics.computeIfAbsent(tm.topic(), t -> new RawMetricsHolder()).recordCruiseControlMetric(ccm);
            break;
        case PARTITION:
            PartitionMetric pm = (PartitionMetric) ccm;
            _dotHandledPartitionMetrics.computeIfAbsent(new TopicPartition(pm.topic(), pm.partition()), tp -> new RawMetricsHolder()).recordCruiseControlMetric(ccm);
            _dotHandledTopicsWithPartitionSizeReported.add(pm.topic());
            break;
        default:
            throw new IllegalStateException(String.format("Should never be here. Unrecognized metric scope %s", rawMetricType.metricScope()));
    }
}
Also used : PartitionMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric) HolderUtils.allowMissingBrokerMetric(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.allowMissingBrokerMetric) TopicPartition(org.apache.kafka.common.TopicPartition) Logger(org.slf4j.Logger) MetricScope(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType.MetricScope) TopicMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric) HolderUtils.convertUnit(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.convertUnit) MISSING_BROKER_METRIC_VALUE(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.MISSING_BROKER_METRIC_VALUE) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) PartitionInfo(org.apache.kafka.common.PartitionInfo) SamplingUtils.replaceDotsWithUnderscores(com.linkedin.kafka.cruisecontrol.monitor.sampling.SamplingUtils.replaceDotsWithUnderscores) Utils.validateNotNull(com.linkedin.cruisecontrol.common.utils.Utils.validateNotNull) HashSet(java.util.HashSet) List(java.util.List) Cluster(org.apache.kafka.common.Cluster) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HolderUtils.sanityCheckMetricScope(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.sanityCheckMetricScope) Map(java.util.Map) CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) METRIC_TYPES_TO_SUM(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.METRIC_TYPES_TO_SUM) Collections(java.util.Collections) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric) TopicMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric)

Aggregations

RawMetricType (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType)10 HashMap (java.util.HashMap)6 HashSet (java.util.HashSet)4 Map (java.util.Map)4 CruiseControlMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric)3 TopicMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric)3 MetricDef (com.linkedin.cruisecontrol.metricdef.MetricDef)2 MetricInfo (com.linkedin.cruisecontrol.metricdef.MetricInfo)2 PartitionMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric)2 KafkaMetricDef (com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef)2 MetricSamplerOptions (com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricSamplerOptions)2 BrokerLoad (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerLoad)2 PartitionMetricSample (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample)2 Set (java.util.Set)2 Cluster (org.apache.kafka.common.Cluster)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 Test (org.junit.Test)2 Utils.validateNotNull (com.linkedin.cruisecontrol.common.utils.Utils.validateNotNull)1 SamplingException (com.linkedin.kafka.cruisecontrol.exception.SamplingException)1 BrokerMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.BrokerMetric)1