Search in sources :

Example 6 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class SamplingUtils method buildPartitionMetricSample.

/**
 * Create a {@link PartitionMetricSample}, record the relevant metrics for the given partition from the given topic on
 * broker that hosts the given number of leaders, and return the sample.
 *
 * @param cluster Kafka cluster.
 * @param leaderDistribution The leader count per topic/broker
 * @param tpDotNotHandled The original topic name that may contain dots.
 * @param brokerLoadById Load information for brokers by the broker id.
 * @param maxMetricTimestamp Maximum timestamp of the sampled metric during the sampling process.
 * @param cachedNumCoresByBroker Cached number of cores by broker.
 * @param skippedPartitionByBroker Number of skipped partition samples by broker ids.
 * @return Metric sample populated with topic and partition metrics, or {@code null} if sample generation is skipped.
 */
static PartitionMetricSample buildPartitionMetricSample(Cluster cluster, Map<Integer, Map<String, Integer>> leaderDistribution, TopicPartition tpDotNotHandled, Map<Integer, BrokerLoad> brokerLoadById, long maxMetricTimestamp, Map<Integer, Short> cachedNumCoresByBroker, Map<Integer, Integer> skippedPartitionByBroker) {
    Node leaderNode = cluster.leaderFor(tpDotNotHandled);
    if (leaderNode == null) {
        LOG.trace("Partition {} has no current leader.", tpDotNotHandled);
        skippedPartitionByBroker.merge(UNRECOGNIZED_BROKER_ID, 1, Integer::sum);
        return null;
    }
    int leaderId = leaderNode.id();
    // TODO: switch to linear regression model without computing partition level CPU usage.
    BrokerLoad brokerLoad = brokerLoadById.get(leaderId);
    TopicPartition tpWithDotHandled = partitionHandleDotInTopicName(tpDotNotHandled);
    if (skipBuildingPartitionMetricSample(tpDotNotHandled, tpWithDotHandled, leaderId, brokerLoad, cachedNumCoresByBroker)) {
        skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
        return null;
    }
    // Fill in all the common metrics.
    MetricDef commonMetricDef = KafkaMetricDef.commonMetricDef();
    PartitionMetricSample pms = new PartitionMetricSample(leaderId, tpDotNotHandled);
    int numLeaders = leaderDistribution.get(leaderId).get(tpDotNotHandled.topic());
    for (RawMetricType rawMetricType : RawMetricType.topicMetricTypes()) {
        double sampleValue = numLeaders == 0 ? 0 : (brokerLoad.topicMetrics(tpWithDotHandled.topic(), rawMetricType)) / numLeaders;
        MetricInfo metricInfo = commonMetricDef.metricInfo(KafkaMetricDef.forRawMetricType(rawMetricType).name());
        pms.record(metricInfo, sampleValue);
    }
    // Fill in disk and CPU utilization, which are not topic metric types.
    Double partitionSize = brokerLoad.partitionMetric(tpWithDotHandled.topic(), tpWithDotHandled.partition(), PARTITION_SIZE);
    if (partitionSize == null) {
        skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
        return null;
    }
    pms.record(commonMetricDef.metricInfo(KafkaMetricDef.DISK_USAGE.name()), partitionSize);
    Double estimatedLeaderCpuUtil = estimateLeaderCpuUtil(pms, brokerLoad, commonMetricDef, cachedNumCoresByBroker.get(leaderId));
    if (estimatedLeaderCpuUtil == null) {
        skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
        return null;
    }
    pms.record(commonMetricDef.metricInfo(KafkaMetricDef.CPU_USAGE.name()), estimatedLeaderCpuUtil);
    pms.close(maxMetricTimestamp);
    return pms;
}
Also used : BrokerLoad(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerLoad) TopicPartition(org.apache.kafka.common.TopicPartition) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) MetricInfo(com.linkedin.cruisecontrol.metricdef.MetricInfo) Node(org.apache.kafka.common.Node) MetricDef(com.linkedin.cruisecontrol.metricdef.MetricDef) KafkaMetricDef(com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample)

Example 7 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class CruiseControlMetricsProcessorTest method testMissingTopicBytesInMetric.

@Test
public void testMissingTopicBytesInMetric() throws TimeoutException, BrokerCapacityResolutionException {
    CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor(mockBrokerCapacityConfigResolver(), false);
    Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
    Set<RawMetricType> metricTypeToExclude = new HashSet<>(Arrays.asList(TOPIC_BYTES_IN, TOPIC_BYTES_OUT, TOPIC_REPLICATION_BYTES_IN, TOPIC_REPLICATION_BYTES_OUT));
    for (CruiseControlMetric metric : metrics) {
        if (metricTypeToExclude.contains(metric.rawMetricType())) {
            TopicMetric tm = (TopicMetric) metric;
            if (tm.brokerId() == BROKER_ID_0 && tm.topic().equals(TOPIC1)) {
                continue;
            }
        }
        processor.addMetric(metric);
    }
    Cluster cluster = getCluster();
    MetricSampler.Samples samples = processor.process(cluster, TEST_PARTITIONS, MetricSampler.SamplingMode.ALL);
    assertEquals(4, samples.partitionMetricSamples().size());
    assertEquals(2, samples.brokerMetricSamples().size());
    for (PartitionMetricSample sample : samples.partitionMetricSamples()) {
        if (sample.entity().tp().equals(T1P0)) {
            // T1P0 should not have any IO or CPU usage.
            validatePartitionMetricSample(sample, _time.milliseconds() + 2, 0.0, 0.0, 0.0, T1P0_BYTES_SIZE);
        }
    }
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) RawMetricType(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType) Cluster(org.apache.kafka.common.Cluster) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) TopicMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.TopicMetric) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class LoadMonitorTaskRunnerTest method testSamplingError.

@Test
public void testSamplingError() {
    KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
    Metadata metadata = new Metadata(METADATA_REFRESH_BACKOFF, METADATA_EXPIRY_MS, new LogContext(), new ClusterResourceListeners());
    MetadataClient metadataClient = new MetadataClient(config, metadata, -1L, TIME);
    assertNotNull(metadataClient.cluster().clusterResource().clusterId());
    MockPartitionMetricSampleAggregator mockMetricSampleAggregator = new MockPartitionMetricSampleAggregator(config, metadata);
    KafkaBrokerMetricSampleAggregator mockBrokerMetricSampleAggregator = EasyMock.mock(KafkaBrokerMetricSampleAggregator.class);
    MetricRegistry dropwizardMetricRegistry = new MetricRegistry();
    MetricSampler sampler = new MockSampler(0);
    MetricFetcherManager fetcherManager = new MetricFetcherManager(config, mockMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, METRIC_DEF, TIME, dropwizardMetricRegistry, null, sampler);
    LoadMonitorTaskRunner loadMonitorTaskRunner = new LoadMonitorTaskRunner(config, fetcherManager, mockMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, TIME);
    while (metadata.fetch().topics().size() < 100) {
        metadataClient.refreshMetadata();
    }
    loadMonitorTaskRunner.start(true);
    int numSamples = 0;
    long startMs = System.currentTimeMillis();
    BlockingQueue<PartitionMetricSample> sampleQueue = mockMetricSampleAggregator.metricSampleQueue();
    while (numSamples < (NUM_PARTITIONS * NUM_TOPICS) * 10 && System.currentTimeMillis() < startMs + 10000) {
        PartitionMetricSample sample = sampleQueue.poll();
        if (sample != null) {
            numSamples++;
        }
    }
    int expectedNumSamples = NUM_TOPICS * NUM_PARTITIONS;
    assertEquals("Only see " + numSamples + " samples. Expecting " + expectedNumSamples + " samples", expectedNumSamples, numSamples);
    fetcherManager.shutdown();
}
Also used : MetricFetcherManager(com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricFetcherManager) ClusterResourceListeners(org.apache.kafka.common.internals.ClusterResourceListeners) MetricSampler(com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricSampler) MetricRegistry(com.codahale.metrics.MetricRegistry) Metadata(org.apache.kafka.clients.Metadata) LogContext(org.apache.kafka.common.utils.LogContext) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) KafkaBrokerMetricSampleAggregator(com.linkedin.kafka.cruisecontrol.monitor.sampling.aggregator.KafkaBrokerMetricSampleAggregator) MetadataClient(com.linkedin.kafka.cruisecontrol.common.MetadataClient) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) Test(org.junit.Test)

Aggregations

PartitionMetricSample (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample)8 Test (org.junit.Test)5 TopicPartition (org.apache.kafka.common.TopicPartition)4 KafkaCruiseControlConfig (com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig)3 HashSet (java.util.HashSet)3 Metadata (org.apache.kafka.clients.Metadata)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 MetricDef (com.linkedin.cruisecontrol.metricdef.MetricDef)2 MetadataClient (com.linkedin.kafka.cruisecontrol.common.MetadataClient)2 CruiseControlMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric)2 RawMetricType (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType)2 KafkaMetricDef (com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef)2 MetricFetcherManager (com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricFetcherManager)2 MetricSampler (com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricSampler)2 KafkaBrokerMetricSampleAggregator (com.linkedin.kafka.cruisecontrol.monitor.sampling.aggregator.KafkaBrokerMetricSampleAggregator)2 Cluster (org.apache.kafka.common.Cluster)2 Node (org.apache.kafka.common.Node)2 ClusterResourceListeners (org.apache.kafka.common.internals.ClusterResourceListeners)2 LogContext (org.apache.kafka.common.utils.LogContext)2 MetricInfo (com.linkedin.cruisecontrol.metricdef.MetricInfo)1