use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class KafkaPartitionMetricSampleAggregatorTest method testExcludeInvalidMetricSample.
@Test
public void testExcludeInvalidMetricSample() throws NotEnoughValidWindowsException {
KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
Metadata metadata = getMetadata(Collections.singleton(TP));
KafkaPartitionMetricSampleAggregator metricSampleAggregator = new KafkaPartitionMetricSampleAggregator(config, metadata);
MetricDef metricDef = KafkaMetricDef.commonMetricDef();
populateSampleAggregator(NUM_WINDOWS + 1, MIN_SAMPLES_PER_WINDOW, metricSampleAggregator);
// Set the leader to be node 1, which is different from the leader in the metadata.
PartitionMetricSample sampleWithDifferentLeader = new PartitionMetricSample(1, TP);
sampleWithDifferentLeader.record(metricDef.metricInfo(DISK_USAGE.name()), 10000);
sampleWithDifferentLeader.record(metricDef.metricInfo(CPU_USAGE.name()), 10000);
sampleWithDifferentLeader.record(metricDef.metricInfo(LEADER_BYTES_IN.name()), 10000);
sampleWithDifferentLeader.record(metricDef.metricInfo(LEADER_BYTES_OUT.name()), 10000);
sampleWithDifferentLeader.close(0);
// Only populate the CPU metric
PartitionMetricSample incompletePartitionMetricSample = new PartitionMetricSample(0, TP);
incompletePartitionMetricSample.record(metricDef.metricInfo(CPU_USAGE.name()), 10000);
incompletePartitionMetricSample.close(0);
metricSampleAggregator.addSample(sampleWithDifferentLeader);
metricSampleAggregator.addSample(incompletePartitionMetricSample);
// Check the window value and make sure the metric samples above are excluded.
Map<PartitionEntity, ValuesAndExtrapolations> valuesAndExtrapolations = metricSampleAggregator.aggregate(metadata.fetch(), NUM_WINDOWS * WINDOW_MS, new OperationProgress()).valuesAndExtrapolations();
ValuesAndExtrapolations partitionValuesAndExtrapolations = valuesAndExtrapolations.get(PE);
for (Resource resource : Resource.cachedValues()) {
Collection<Short> metricIds = KafkaMetricDef.resourceToMetricIds(resource);
double expectedValue = (resource == Resource.DISK ? MIN_SAMPLES_PER_WINDOW - 1 : (MIN_SAMPLES_PER_WINDOW - 1) / 2.0) / (resource == Resource.CPU ? UNIT_INTERVAL_TO_PERCENTAGE : 1.0) * metricIds.size();
assertEquals("The utilization for " + resource + " should be " + expectedValue, expectedValue, partitionValuesAndExtrapolations.metricValues().valuesForGroup(resource.name(), KafkaMetricDef.commonMetricDef(), true).get(NUM_WINDOWS - 1), 0.01);
}
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class CruiseControlMetricsProcessorTest method testBasic.
@Test
public void testBasic() throws TimeoutException, BrokerCapacityResolutionException {
CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor(mockBrokerCapacityConfigResolver(), false);
Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
Cluster cluster = getCluster();
metrics.forEach(processor::addMetric);
MetricSampler.Samples samples = processor.process(cluster, TEST_PARTITIONS, MetricSampler.SamplingMode.ALL);
for (Node node : cluster.nodes()) {
assertEquals(MOCK_NUM_CPU_CORES, (short) processor.cachedNumCoresByBroker().get(node.id()));
}
assertEquals(4, samples.partitionMetricSamples().size());
assertEquals(2, samples.brokerMetricSamples().size());
for (PartitionMetricSample sample : samples.partitionMetricSamples()) {
if (sample.entity().tp().equals(T1P0)) {
validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T1P0), B0_TOPIC1_BYTES_IN, B0_TOPIC1_BYTES_OUT, T1P0_BYTES_SIZE);
} else if (sample.entity().tp().equals(T1P1)) {
validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T1P1), B1_TOPIC1_BYTES_IN, B1_TOPIC1_BYTES_OUT, T1P1_BYTES_SIZE);
} else if (sample.entity().tp().equals(T2P0)) {
validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T2P0), B0_TOPIC2_BYTES_IN / 2, B0_TOPIC2_BYTES_OUT / 2, T2P0_BYTES_SIZE);
} else if (sample.entity().tp().equals(T2P1)) {
validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T2P1), B0_TOPIC2_BYTES_IN / 2, B0_TOPIC2_BYTES_OUT / 2, T2P1_BYTES_SIZE);
} else {
fail("Should never have partition " + sample.entity().tp());
}
}
for (BrokerMetricSample sample : samples.brokerMetricSamples()) {
if (sample.metricValue(CPU_USAGE) == B0_CPU) {
assertEquals(B0_TOPIC1_REPLICATION_BYTES_IN, sample.metricValue(REPLICATION_BYTES_IN_RATE), DELTA);
} else if (sample.metricValue(CPU_USAGE) == B1_CPU) {
assertEquals(B1_TOPIC1_REPLICATION_BYTES_IN + B1_TOPIC2_REPLICATION_BYTES_IN, sample.metricValue(REPLICATION_BYTES_IN_RATE), DELTA);
} else {
fail("Should never have broker cpu util " + sample.metricValue(CPU_USAGE));
}
}
assertFalse(samples.partitionMetricSamples().isEmpty());
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class LoadMonitorTaskRunnerTest method testSimpleFetch.
@Test
public void testSimpleFetch() throws InterruptedException {
KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
Metadata metadata = new Metadata(METADATA_REFRESH_BACKOFF, METADATA_EXPIRY_MS, new LogContext(), new ClusterResourceListeners());
MetadataClient metadataClient = new MetadataClient(config, metadata, -1L, TIME);
assertNotNull(metadataClient.cluster().clusterResource().clusterId());
MockPartitionMetricSampleAggregator mockPartitionMetricSampleAggregator = new MockPartitionMetricSampleAggregator(config, metadata);
KafkaBrokerMetricSampleAggregator mockBrokerMetricSampleAggregator = EasyMock.mock(KafkaBrokerMetricSampleAggregator.class);
MetricRegistry dropwizardMetricRegistry = new MetricRegistry();
MetricSampler sampler = new MockSampler(0);
MetricFetcherManager fetcherManager = new MetricFetcherManager(config, mockPartitionMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, METRIC_DEF, TIME, dropwizardMetricRegistry, null, sampler);
LoadMonitorTaskRunner loadMonitorTaskRunner = new LoadMonitorTaskRunner(config, fetcherManager, mockPartitionMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, TIME);
while (metadata.fetch().topics().size() < NUM_TOPICS) {
Thread.sleep(10);
metadataClient.refreshMetadata();
}
loadMonitorTaskRunner.start(true);
Set<TopicPartition> partitionsToSample = new HashSet<>();
for (int i = 0; i < NUM_TOPICS; i++) {
for (int j = 0; j < NUM_PARTITIONS; j++) {
partitionsToSample.add(new TopicPartition(TOPIC_PREFIX + i, j));
}
}
long startMs = System.currentTimeMillis();
BlockingQueue<PartitionMetricSample> sampleQueue = mockPartitionMetricSampleAggregator.metricSampleQueue();
while (!partitionsToSample.isEmpty() && System.currentTimeMillis() < startMs + 10000) {
PartitionMetricSample sample = sampleQueue.poll();
if (sample != null) {
assertTrue("The topic partition should have been sampled and sampled only once.", partitionsToSample.contains(sample.entity().tp()));
partitionsToSample.remove(sample.entity().tp());
}
}
assertTrue("Did not see sample for partitions " + Arrays.toString(partitionsToSample.toArray()), partitionsToSample.isEmpty());
fetcherManager.shutdown();
assertTrue(sampleQueue.isEmpty());
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class SamplingFetcher method usePartitionMetricSamples.
@Override
protected void usePartitionMetricSamples(Set<PartitionMetricSample> partitionMetricSamples) {
// Give an initial capacity to avoid resizing.
Set<TopicPartition> returnedPartitions = new HashSet<>();
// Ignore the null value if the metric sampler did not return a sample
if (partitionMetricSamples != null) {
int discarded = 0;
Iterator<PartitionMetricSample> iter = partitionMetricSamples.iterator();
while (iter.hasNext()) {
PartitionMetricSample partitionMetricSample = iter.next();
TopicPartition tp = partitionMetricSample.entity().tp();
if (_assignedPartitions.contains(tp)) {
// we fill in the cpu utilization based on the model in case user did not fill it in.
if (_useLinearRegressionModel && ModelParameters.trainingCompleted()) {
partitionMetricSample.record(KafkaMetricDef.commonMetricDef().metricInfo(CPU_USAGE.name()), estimateLeaderCpuUtilUsingLinearRegressionModel(partitionMetricSample));
}
// we close the metric sample in case the implementation forgot to do so.
partitionMetricSample.close(_endTimeMs);
// We remove the sample from the returning set if it is not accepted.
if (_partitionMetricSampleAggregator.addSample(partitionMetricSample, _leaderValidation)) {
LOG.trace("Enqueued partition metric sample {}", partitionMetricSample);
} else {
iter.remove();
discarded++;
LOG.trace("Failed to add partition metric sample {}", partitionMetricSample);
}
returnedPartitions.add(tp);
} else {
LOG.warn("Collected partition metric sample for partition {} which is not an assigned partition. " + "The metric sample will be ignored.", tp);
}
}
LOG.info("Collected {}{} partition metric samples for {} partitions. Total partition assigned: {}.", partitionMetricSamples.size(), discarded > 0 ? String.format("(%d discarded)", discarded) : "", returnedPartitions.size(), _assignedPartitions.size());
} else {
LOG.warn("Failed to collect partition metric samples for {} assigned partitions", _assignedPartitions.size());
}
}
use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.
the class CruiseControlMetricsProcessor method addPartitionMetricSamples.
/**
* Add the partition metric samples to the provided set.
*
* @param cluster Kafka cluster
* @param partitionsDotNotHandled The partitions to get samples. The topic partition name may have dots.
* @param partitionMetricSamples The set to add the partition samples to.
* @return The number of skipped partitions by broker ids. A broker id of {@link SamplingUtils#UNRECOGNIZED_BROKER_ID}
* indicates unrecognized broker.
*/
private Map<Integer, Integer> addPartitionMetricSamples(Cluster cluster, Set<TopicPartition> partitionsDotNotHandled, Set<PartitionMetricSample> partitionMetricSamples) {
Map<Integer, Integer> skippedPartitionByBroker = new HashMap<>();
Map<Integer, Map<String, Integer>> leaderDistribution = leaderDistribution(cluster);
for (TopicPartition tpDotNotHandled : partitionsDotNotHandled) {
try {
PartitionMetricSample sample = buildPartitionMetricSample(cluster, leaderDistribution, tpDotNotHandled, _brokerLoad, _maxMetricTimestamp, _cachedNumCoresByBroker, skippedPartitionByBroker);
if (sample != null) {
LOG.trace("Added partition metrics sample for {}.", tpDotNotHandled);
partitionMetricSamples.add(sample);
}
} catch (Exception e) {
LOG.error("Error building partition metric sample for {}.", tpDotNotHandled, e);
skippedPartitionByBroker.merge(UNRECOGNIZED_BROKER_ID, 1, Integer::sum);
}
}
return skippedPartitionByBroker;
}
Aggregations