use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class SamplingUtils method buildPartitionMetricSample.
/**
* Create a {@link PartitionMetricSample}, record the relevant metrics for the given partition from the given topic on
* broker that hosts the given number of leaders, and return the sample.
*
* @param cluster Kafka cluster.
* @param leaderDistribution The leader count per topic/broker
* @param tpDotNotHandled The original topic name that may contain dots.
* @param brokerLoadById Load information for brokers by the broker id.
* @param maxMetricTimestamp Maximum timestamp of the sampled metric during the sampling process.
* @param cachedNumCoresByBroker Cached number of cores by broker.
* @param skippedPartitionByBroker Number of skipped partition samples by broker ids.
* @return Metric sample populated with topic and partition metrics, or {@code null} if sample generation is skipped.
*/
static PartitionMetricSample buildPartitionMetricSample(Cluster cluster, Map<Integer, Map<String, Integer>> leaderDistribution, TopicPartition tpDotNotHandled, Map<Integer, BrokerLoad> brokerLoadById, long maxMetricTimestamp, Map<Integer, Short> cachedNumCoresByBroker, Map<Integer, Integer> skippedPartitionByBroker) {
Node leaderNode = cluster.leaderFor(tpDotNotHandled);
if (leaderNode == null) {
LOG.trace("Partition {} has no current leader.", tpDotNotHandled);
skippedPartitionByBroker.merge(UNRECOGNIZED_BROKER_ID, 1, Integer::sum);
return null;
}
int leaderId = leaderNode.id();
// TODO: switch to linear regression model without computing partition level CPU usage.
BrokerLoad brokerLoad = brokerLoadById.get(leaderId);
TopicPartition tpWithDotHandled = partitionHandleDotInTopicName(tpDotNotHandled);
if (skipBuildingPartitionMetricSample(tpDotNotHandled, tpWithDotHandled, leaderId, brokerLoad, cachedNumCoresByBroker)) {
skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
return null;
}
// Fill in all the common metrics.
MetricDef commonMetricDef = KafkaMetricDef.commonMetricDef();
PartitionMetricSample pms = new PartitionMetricSample(leaderId, tpDotNotHandled);
int numLeaders = leaderDistribution.get(leaderId).get(tpDotNotHandled.topic());
for (RawMetricType rawMetricType : RawMetricType.topicMetricTypes()) {
double sampleValue = numLeaders == 0 ? 0 : (brokerLoad.topicMetrics(tpWithDotHandled.topic(), rawMetricType)) / numLeaders;
MetricInfo metricInfo = commonMetricDef.metricInfo(KafkaMetricDef.forRawMetricType(rawMetricType).name());
pms.record(metricInfo, sampleValue);
}
// Fill in disk and CPU utilization, which are not topic metric types.
Double partitionSize = brokerLoad.partitionMetric(tpWithDotHandled.topic(), tpWithDotHandled.partition(), PARTITION_SIZE);
if (partitionSize == null) {
skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
return null;
}
pms.record(commonMetricDef.metricInfo(KafkaMetricDef.DISK_USAGE.name()), partitionSize);
Double estimatedLeaderCpuUtil = estimateLeaderCpuUtil(pms, brokerLoad, commonMetricDef, cachedNumCoresByBroker.get(leaderId));
if (estimatedLeaderCpuUtil == null) {
skippedPartitionByBroker.merge(leaderId, 1, Integer::sum);
return null;
}
pms.record(commonMetricDef.metricInfo(KafkaMetricDef.CPU_USAGE.name()), estimatedLeaderCpuUtil);
pms.close(maxMetricTimestamp);
return pms;
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class CruiseControlMetricsProcessorTest method testMissingTopicBytesInMetric.
@Test
public void testMissingTopicBytesInMetric() throws TimeoutException, BrokerCapacityResolutionException {
CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor(mockBrokerCapacityConfigResolver(), false);
Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
Set<RawMetricType> metricTypeToExclude = new HashSet<>(Arrays.asList(TOPIC_BYTES_IN, TOPIC_BYTES_OUT, TOPIC_REPLICATION_BYTES_IN, TOPIC_REPLICATION_BYTES_OUT));
for (CruiseControlMetric metric : metrics) {
if (metricTypeToExclude.contains(metric.rawMetricType())) {
TopicMetric tm = (TopicMetric) metric;
if (tm.brokerId() == BROKER_ID_0 && tm.topic().equals(TOPIC1)) {
continue;
}
}
processor.addMetric(metric);
}
Cluster cluster = getCluster();
MetricSampler.Samples samples = processor.process(cluster, TEST_PARTITIONS, MetricSampler.SamplingMode.ALL);
assertEquals(4, samples.partitionMetricSamples().size());
assertEquals(2, samples.brokerMetricSamples().size());
for (PartitionMetricSample sample : samples.partitionMetricSamples()) {
if (sample.entity().tp().equals(T1P0)) {
// T1P0 should not have any IO or CPU usage.
validatePartitionMetricSample(sample, _time.milliseconds() + 2, 0.0, 0.0, 0.0, T1P0_BYTES_SIZE);
}
}
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class LoadMonitorTaskRunnerTest method testSamplingError.
@Test
public void testSamplingError() {
KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
Metadata metadata = new Metadata(METADATA_REFRESH_BACKOFF, METADATA_EXPIRY_MS, new LogContext(), new ClusterResourceListeners());
MetadataClient metadataClient = new MetadataClient(config, metadata, -1L, TIME);
assertNotNull(metadataClient.cluster().clusterResource().clusterId());
MockPartitionMetricSampleAggregator mockMetricSampleAggregator = new MockPartitionMetricSampleAggregator(config, metadata);
KafkaBrokerMetricSampleAggregator mockBrokerMetricSampleAggregator = EasyMock.mock(KafkaBrokerMetricSampleAggregator.class);
MetricRegistry dropwizardMetricRegistry = new MetricRegistry();
MetricSampler sampler = new MockSampler(0);
MetricFetcherManager fetcherManager = new MetricFetcherManager(config, mockMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, METRIC_DEF, TIME, dropwizardMetricRegistry, null, sampler);
LoadMonitorTaskRunner loadMonitorTaskRunner = new LoadMonitorTaskRunner(config, fetcherManager, mockMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, TIME);
while (metadata.fetch().topics().size() < 100) {
metadataClient.refreshMetadata();
}
loadMonitorTaskRunner.start(true);
int numSamples = 0;
long startMs = System.currentTimeMillis();
BlockingQueue<PartitionMetricSample> sampleQueue = mockMetricSampleAggregator.metricSampleQueue();
while (numSamples < (NUM_PARTITIONS * NUM_TOPICS) * 10 && System.currentTimeMillis() < startMs + 10000) {
PartitionMetricSample sample = sampleQueue.poll();
if (sample != null) {
numSamples++;
}
}
int expectedNumSamples = NUM_TOPICS * NUM_PARTITIONS;
assertEquals("Only see " + numSamples + " samples. Expecting " + expectedNumSamples + " samples", expectedNumSamples, numSamples);
fetcherManager.shutdown();
}
Aggregations