Search in sources :

Example 6 with CruiseControlMetric

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric in project cruise-control by linkedin.

the class CruiseControlMetricsReporter method sendCruiseControlMetric.

/**
 * Send a CruiseControlMetric to the Kafka topic.
 * @param ccm the Cruise Control metric to send.
 */
public void sendCruiseControlMetric(CruiseControlMetric ccm) {
    // Use topic name as key if existing so that the same sampler will be able to collect all the information
    // of a topic.
    String key = ccm.metricClassId() == CruiseControlMetric.MetricClassId.TOPIC_METRIC ? ((TopicMetric) ccm).topic() : Integer.toString(ccm.brokerId());
    ProducerRecord<String, CruiseControlMetric> producerRecord = new ProducerRecord<>(_cruiseControlMetricsTopic, null, ccm.time(), key, ccm);
    LOG.debug("Sending Cruise Control metric {}.", ccm);
    _producer.send(producerRecord, new Callback() {

        @Override
        public void onCompletion(RecordMetadata recordMetadata, Exception e) {
            if (e != null) {
                LOG.warn("Failed to send Cruise Control metric {}", ccm);
                _numMetricSendFailure++;
            }
        }
    });
}
Also used : RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) Callback(org.apache.kafka.clients.producer.Callback) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) TimeoutException(java.util.concurrent.TimeoutException) KafkaException(org.apache.kafka.common.KafkaException) TopicExistsException(org.apache.kafka.common.errors.TopicExistsException) CruiseControlMetricsReporterException(com.linkedin.kafka.cruisecontrol.metricsreporter.exception.CruiseControlMetricsReporterException) InterruptException(org.apache.kafka.common.errors.InterruptException) IOException(java.io.IOException) ConfigException(org.apache.kafka.common.config.ConfigException) ExecutionException(java.util.concurrent.ExecutionException) ReassignmentInProgressException(org.apache.kafka.common.errors.ReassignmentInProgressException)

Example 7 with CruiseControlMetric

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric in project cruise-control by linkedin.

the class CruiseControlMetricsProcessorTest method testBrokerMetricInvalid.

@Test
public void testBrokerMetricInvalid() {
    CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor();
    Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
    for (CruiseControlMetric metric : metrics) {
        if (metric.metricType() == MetricType.ALL_TOPIC_BYTES_IN && metric.brokerId() == BROKER_ID_0) {
            processor.addMetric(new BrokerMetric(MetricType.ALL_TOPIC_BYTES_IN, _time.milliseconds(), BROKER_ID_0, 1000.0));
        } else {
            processor.addMetric(metric);
        }
    }
    MetricSampler.Samples samples = processor.process(getCluster(), Arrays.asList(T1P0, T1P1, T2P0, T2P1), MetricSampler.SamplingMode.ALL, METRIC_DEF);
    assertEquals("Should have ignored partitions on broker 0", 1, samples.partitionMetricSamples().size());
    assertEquals("Should have ignored broker 0", 1, samples.brokerMetricSamples().size());
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) BrokerMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.BrokerMetric) Test(org.junit.Test)

Example 8 with CruiseControlMetric

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric in project cruise-control by linkedin.

the class CruiseControlMetricsReporterSampler method getSamples.

@Override
public Samples getSamples(Cluster cluster, Set<TopicPartition> assignedPartitions, long startTimeMs, long endTimeMs, SamplingMode mode, MetricDef metricDef) throws MetricSamplingException {
    // Ensure we have an assignment.
    long pollerCount = 0L;
    while (_metricConsumer.assignment().isEmpty()) {
        pollerCount++;
        _metricConsumer.poll(10);
        if (pollerCount % (12000) == 0) {
            LOG.warn("metricConsumer Assignment is empty .. Did you copy the cruise-control-metrics-reporter.jar to Kafka libs ?");
        }
    }
    // Now seek to the startTimeMs.
    Map<TopicPartition, Long> timestampToSeek = new HashMap<>();
    for (TopicPartition tp : _metricConsumer.assignment()) {
        timestampToSeek.put(tp, startTimeMs);
    }
    Set<TopicPartition> assignment = new HashSet<>(_metricConsumer.assignment());
    Map<TopicPartition, Long> endOffsets = _metricConsumer.endOffsets(assignment);
    Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes = _metricConsumer.offsetsForTimes(timestampToSeek);
    // If some of the partitions does not have data, we simply seek to the end offset. To avoid losing metrics, we use
    // the end offsets before the timestamp query.
    assignment.removeAll(offsetsForTimes.keySet());
    for (TopicPartition tp : assignment) {
        _metricConsumer.seek(tp, endOffsets.get(tp));
    }
    // For the partition that returned an offset, seek to the returned offsets.
    for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : offsetsForTimes.entrySet()) {
        TopicPartition tp = entry.getKey();
        OffsetAndTimestamp offsetAndTimestamp = entry.getValue();
        if (offsetAndTimestamp != null) {
            _metricConsumer.seek(tp, offsetAndTimestamp.offset());
        } else {
            _metricConsumer.seek(tp, endOffsets.get(tp));
        }
    }
    LOG.debug("Starting consuming from metrics reporter topic.");
    _metricConsumer.resume(_metricConsumer.paused());
    int numMetricsAdded;
    int totalMetricsAdded = 0;
    long maxTimeStamp = -1L;
    do {
        numMetricsAdded = 0;
        ConsumerRecords<String, CruiseControlMetric> records = _metricConsumer.poll(5000L);
        for (ConsumerRecord<String, CruiseControlMetric> record : records) {
            if (record == null) {
                // the current code is still old. We simply ignore that metric in this case.
                continue;
            }
            if (startTimeMs <= record.value().time() && record.value().time() < endTimeMs) {
                METRICS_PROCESSOR.addMetric(record.value());
                maxTimeStamp = Math.max(maxTimeStamp, record.value().time());
                numMetricsAdded++;
                totalMetricsAdded++;
            } else if (record.value().time() >= endTimeMs) {
                TopicPartition tp = new TopicPartition(record.topic(), record.partition());
                _metricConsumer.pause(Collections.singleton(tp));
            }
        }
    } while (numMetricsAdded != 0 || System.currentTimeMillis() < endTimeMs);
    LOG.debug("Finished sampling for time range [{},{}]. Collected {} metrics.", startTimeMs, endTimeMs, totalMetricsAdded);
    try {
        if (totalMetricsAdded > 0) {
            return METRICS_PROCESSOR.process(cluster, assignedPartitions, mode, metricDef);
        } else {
            return new Samples(Collections.emptySet(), Collections.emptySet());
        }
    } finally {
        METRICS_PROCESSOR.clear();
    }
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp)

Example 9 with CruiseControlMetric

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric in project cruise-control by linkedin.

the class BrokerLoad method recordMetric.

/**
 * Record the given Cruise Control metric.
 *
 * @param ccm Cruise Control metric.
 */
public void recordMetric(CruiseControlMetric ccm) {
    RawMetricType rawMetricType = ccm.rawMetricType();
    switch(rawMetricType.metricScope()) {
        case BROKER:
            _brokerMetrics.recordCruiseControlMetric(ccm);
            break;
        case TOPIC:
            TopicMetric tm = (TopicMetric) ccm;
            _dotHandledTopicMetrics.computeIfAbsent(tm.topic(), t -> new RawMetricsHolder()).recordCruiseControlMetric(ccm);
            break;
        case PARTITION:
            PartitionMetric pm = (PartitionMetric) ccm;
            _dotHandledPartitionMetrics.computeIfAbsent(new TopicPartition(pm.topic(), pm.partition()), tp -> new RawMetricsHolder()).recordCruiseControlMetric(ccm);
            _dotHandledTopicsWithPartitionSizeReported.add(pm.topic());
            break;
        default:
            throw new IllegalStateException(String.format("Should never be here. Unrecognized metric scope %s", rawMetricType.metricScope()));
    }
}
Also used : PartitionMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric) HolderUtils.allowMissingBrokerMetric(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.allowMissingBrokerMetric) TopicPartition(org.apache.kafka.common.TopicPartition) Logger(org.slf4j.Logger) MetricScope(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType.MetricScope) TopicMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric) HolderUtils.convertUnit(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.convertUnit) MISSING_BROKER_METRIC_VALUE(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.MISSING_BROKER_METRIC_VALUE) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) PartitionInfo(org.apache.kafka.common.PartitionInfo) SamplingUtils.replaceDotsWithUnderscores(com.linkedin.kafka.cruisecontrol.monitor.sampling.SamplingUtils.replaceDotsWithUnderscores) Utils.validateNotNull(com.linkedin.cruisecontrol.common.utils.Utils.validateNotNull) HashSet(java.util.HashSet) List(java.util.List) Cluster(org.apache.kafka.common.Cluster) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HolderUtils.sanityCheckMetricScope(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.sanityCheckMetricScope) Map(java.util.Map) CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) METRIC_TYPES_TO_SUM(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.HolderUtils.METRIC_TYPES_TO_SUM) Collections(java.util.Collections) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric) TopicMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric)

Example 10 with CruiseControlMetric

use of com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric in project cruise-control by linkedin.

the class CruiseControlMetricsReporterSampler method retrieveMetricsForProcessing.

@Override
protected int retrieveMetricsForProcessing(MetricSamplerOptions metricSamplerOptions) throws SamplingException {
    if (refreshPartitionAssignment()) {
        return 0;
    }
    // Now seek to the startTimeMs.
    Map<TopicPartition, Long> timestampToSeek = new HashMap<>();
    for (TopicPartition tp : _currentPartitionAssignment) {
        timestampToSeek.put(tp, metricSamplerOptions.startTimeMs());
    }
    Set<TopicPartition> assignment = new HashSet<>(_currentPartitionAssignment);
    Map<TopicPartition, Long> endOffsets = _metricConsumer.endOffsets(assignment);
    Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes = _metricConsumer.offsetsForTimes(timestampToSeek);
    sanityCheckOffsetFetch(endOffsets, offsetsForTimes);
    // If some partitions do not have data, we simply seek to the end offset. To avoid losing metrics, we use the end
    // offsets before the timestamp query.
    assignment.removeAll(offsetsForTimes.keySet());
    assignment.forEach(tp -> _metricConsumer.seek(tp, endOffsets.get(tp)));
    // For the partition that returned an offset, seek to the returned offsets.
    for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : offsetsForTimes.entrySet()) {
        TopicPartition tp = entry.getKey();
        OffsetAndTimestamp offsetAndTimestamp = entry.getValue();
        _metricConsumer.seek(tp, offsetAndTimestamp != null ? offsetAndTimestamp.offset() : endOffsets.get(tp));
    }
    SortedSet<Integer> partitionIds = _currentPartitionAssignment.stream().map(TopicPartition::partition).collect(Collectors.toCollection(TreeSet::new));
    LOG.debug("Starting consuming from metrics reporter topic {} for partitions {}.", _metricReporterTopic, partitionIds);
    _metricConsumer.resume(_metricConsumer.paused());
    int totalMetricsAdded = 0;
    Set<TopicPartition> partitionsToPause = new HashSet<>();
    do {
        ConsumerRecords<String, CruiseControlMetric> records = _metricConsumer.poll(METRIC_REPORTER_CONSUMER_POLL_TIMEOUT);
        for (ConsumerRecord<String, CruiseControlMetric> record : records) {
            if (record == null) {
                // This means we cannot parse the metrics. It might happen when a newer type of metrics has been added and
                // the current code is still old. We simply ignore that metric in this case.
                LOG.warn("Cannot parse record, please update your Cruise Control version.");
                continue;
            }
            long recordTime = record.value().time();
            if (recordTime + _acceptableMetricRecordProduceDelayMs < metricSamplerOptions.startTimeMs()) {
                LOG.debug("Discarding metric {} because its timestamp is more than {} ms earlier than the start time of sampling period {}.", record.value(), _acceptableMetricRecordProduceDelayMs, metricSamplerOptions.startTimeMs());
            } else if (recordTime >= metricSamplerOptions.endTimeMs()) {
                TopicPartition tp = new TopicPartition(record.topic(), record.partition());
                LOG.debug("Saw metric {} whose timestamp is larger than the end time of sampling period {}. Pausing " + "partition {} at offset {}.", record.value(), metricSamplerOptions.endTimeMs(), tp, record.offset());
                partitionsToPause.add(tp);
            } else {
                addMetricForProcessing(record.value());
                totalMetricsAdded++;
            }
        }
        if (!partitionsToPause.isEmpty()) {
            _metricConsumer.pause(partitionsToPause);
            partitionsToPause.clear();
        }
    } while (!consumptionDone(_metricConsumer, endOffsets) && System.currentTimeMillis() < metricSamplerOptions.timeoutMs());
    LOG.info("Finished sampling from topic {} for partitions {} in time range [{},{}]. Collected {} metrics.", _metricReporterTopic, partitionIds, metricSamplerOptions.startTimeMs(), metricSamplerOptions.endTimeMs(), totalMetricsAdded);
    return totalMetricsAdded;
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp)

Aggregations

CruiseControlMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric)14 Test (org.junit.Test)9 Cluster (org.apache.kafka.common.Cluster)8 HashSet (java.util.HashSet)6 PartitionMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.PartitionMetric)3 RawMetricType (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType)3 TopicMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 TopicPartition (org.apache.kafka.common.TopicPartition)3 BrokerCapacityConfigResolver (com.linkedin.kafka.cruisecontrol.config.BrokerCapacityConfigResolver)2 BrokerCapacityInfo (com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo)2 BrokerMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.BrokerMetric)2 PartitionMetricSample (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample)2 TimeoutException (java.util.concurrent.TimeoutException)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 OffsetAndTimestamp (org.apache.kafka.clients.consumer.OffsetAndTimestamp)2 Node (org.apache.kafka.common.Node)2 Utils.validateNotNull (com.linkedin.cruisecontrol.common.utils.Utils.validateNotNull)1 BrokerCapacityResolutionException (com.linkedin.kafka.cruisecontrol.exception.BrokerCapacityResolutionException)1