Search in sources :

Example 1 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class KafkaPartitionMetricSampleAggregatorTest method testExcludeInvalidMetricSample.

@Test
public void testExcludeInvalidMetricSample() throws NotEnoughValidWindowsException {
    KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
    Metadata metadata = getMetadata(Collections.singleton(TP));
    KafkaPartitionMetricSampleAggregator metricSampleAggregator = new KafkaPartitionMetricSampleAggregator(config, metadata);
    MetricDef metricDef = KafkaMetricDef.commonMetricDef();
    populateSampleAggregator(NUM_WINDOWS + 1, MIN_SAMPLES_PER_WINDOW, metricSampleAggregator);
    // Set the leader to be node 1, which is different from the leader in the metadata.
    PartitionMetricSample sampleWithDifferentLeader = new PartitionMetricSample(1, TP);
    sampleWithDifferentLeader.record(metricDef.metricInfo(DISK_USAGE.name()), 10000);
    sampleWithDifferentLeader.record(metricDef.metricInfo(CPU_USAGE.name()), 10000);
    sampleWithDifferentLeader.record(metricDef.metricInfo(LEADER_BYTES_IN.name()), 10000);
    sampleWithDifferentLeader.record(metricDef.metricInfo(LEADER_BYTES_OUT.name()), 10000);
    sampleWithDifferentLeader.close(0);
    // Only populate the CPU metric
    PartitionMetricSample incompletePartitionMetricSample = new PartitionMetricSample(0, TP);
    incompletePartitionMetricSample.record(metricDef.metricInfo(CPU_USAGE.name()), 10000);
    incompletePartitionMetricSample.close(0);
    metricSampleAggregator.addSample(sampleWithDifferentLeader);
    metricSampleAggregator.addSample(incompletePartitionMetricSample);
    // Check the window value and make sure the metric samples above are excluded.
    Map<PartitionEntity, ValuesAndExtrapolations> valuesAndExtrapolations = metricSampleAggregator.aggregate(metadata.fetch(), NUM_WINDOWS * WINDOW_MS, new OperationProgress()).valuesAndExtrapolations();
    ValuesAndExtrapolations partitionValuesAndExtrapolations = valuesAndExtrapolations.get(PE);
    for (Resource resource : Resource.cachedValues()) {
        Collection<Short> metricIds = KafkaMetricDef.resourceToMetricIds(resource);
        double expectedValue = (resource == Resource.DISK ? MIN_SAMPLES_PER_WINDOW - 1 : (MIN_SAMPLES_PER_WINDOW - 1) / 2.0) / (resource == Resource.CPU ? UNIT_INTERVAL_TO_PERCENTAGE : 1.0) * metricIds.size();
        assertEquals("The utilization for " + resource + " should be " + expectedValue, expectedValue, partitionValuesAndExtrapolations.metricValues().valuesForGroup(resource.name(), KafkaMetricDef.commonMetricDef(), true).get(NUM_WINDOWS - 1), 0.01);
    }
}
Also used : OperationProgress(com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress) PartitionEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionEntity) MonitorUnitTestUtils.getMetadata(com.linkedin.kafka.cruisecontrol.monitor.MonitorUnitTestUtils.getMetadata) Metadata(org.apache.kafka.clients.Metadata) MetricDef(com.linkedin.cruisecontrol.metricdef.MetricDef) KafkaMetricDef(com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) ValuesAndExtrapolations(com.linkedin.cruisecontrol.monitor.sampling.aggregator.ValuesAndExtrapolations) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) Test(org.junit.Test)

Example 2 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class CruiseControlMetricsProcessorTest method testBasic.

@Test
public void testBasic() throws TimeoutException, BrokerCapacityResolutionException {
    CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor(mockBrokerCapacityConfigResolver(), false);
    Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
    Cluster cluster = getCluster();
    metrics.forEach(processor::addMetric);
    MetricSampler.Samples samples = processor.process(cluster, TEST_PARTITIONS, MetricSampler.SamplingMode.ALL);
    for (Node node : cluster.nodes()) {
        assertEquals(MOCK_NUM_CPU_CORES, (short) processor.cachedNumCoresByBroker().get(node.id()));
    }
    assertEquals(4, samples.partitionMetricSamples().size());
    assertEquals(2, samples.brokerMetricSamples().size());
    for (PartitionMetricSample sample : samples.partitionMetricSamples()) {
        if (sample.entity().tp().equals(T1P0)) {
            validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T1P0), B0_TOPIC1_BYTES_IN, B0_TOPIC1_BYTES_OUT, T1P0_BYTES_SIZE);
        } else if (sample.entity().tp().equals(T1P1)) {
            validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T1P1), B1_TOPIC1_BYTES_IN, B1_TOPIC1_BYTES_OUT, T1P1_BYTES_SIZE);
        } else if (sample.entity().tp().equals(T2P0)) {
            validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T2P0), B0_TOPIC2_BYTES_IN / 2, B0_TOPIC2_BYTES_OUT / 2, T2P0_BYTES_SIZE);
        } else if (sample.entity().tp().equals(T2P1)) {
            validatePartitionMetricSample(sample, _time.milliseconds() + 2, CPU_UTIL.get(T2P1), B0_TOPIC2_BYTES_IN / 2, B0_TOPIC2_BYTES_OUT / 2, T2P1_BYTES_SIZE);
        } else {
            fail("Should never have partition " + sample.entity().tp());
        }
    }
    for (BrokerMetricSample sample : samples.brokerMetricSamples()) {
        if (sample.metricValue(CPU_USAGE) == B0_CPU) {
            assertEquals(B0_TOPIC1_REPLICATION_BYTES_IN, sample.metricValue(REPLICATION_BYTES_IN_RATE), DELTA);
        } else if (sample.metricValue(CPU_USAGE) == B1_CPU) {
            assertEquals(B1_TOPIC1_REPLICATION_BYTES_IN + B1_TOPIC2_REPLICATION_BYTES_IN, sample.metricValue(REPLICATION_BYTES_IN_RATE), DELTA);
        } else {
            fail("Should never have broker cpu util " + sample.metricValue(CPU_USAGE));
        }
    }
    assertFalse(samples.partitionMetricSamples().isEmpty());
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) Node(org.apache.kafka.common.Node) Cluster(org.apache.kafka.common.Cluster) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) BrokerMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerMetricSample) Test(org.junit.Test)

Example 3 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class LoadMonitorTaskRunnerTest method testSimpleFetch.

@Test
public void testSimpleFetch() throws InterruptedException {
    KafkaCruiseControlConfig config = new KafkaCruiseControlConfig(getLoadMonitorProperties());
    Metadata metadata = new Metadata(METADATA_REFRESH_BACKOFF, METADATA_EXPIRY_MS, new LogContext(), new ClusterResourceListeners());
    MetadataClient metadataClient = new MetadataClient(config, metadata, -1L, TIME);
    assertNotNull(metadataClient.cluster().clusterResource().clusterId());
    MockPartitionMetricSampleAggregator mockPartitionMetricSampleAggregator = new MockPartitionMetricSampleAggregator(config, metadata);
    KafkaBrokerMetricSampleAggregator mockBrokerMetricSampleAggregator = EasyMock.mock(KafkaBrokerMetricSampleAggregator.class);
    MetricRegistry dropwizardMetricRegistry = new MetricRegistry();
    MetricSampler sampler = new MockSampler(0);
    MetricFetcherManager fetcherManager = new MetricFetcherManager(config, mockPartitionMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, METRIC_DEF, TIME, dropwizardMetricRegistry, null, sampler);
    LoadMonitorTaskRunner loadMonitorTaskRunner = new LoadMonitorTaskRunner(config, fetcherManager, mockPartitionMetricSampleAggregator, mockBrokerMetricSampleAggregator, metadataClient, TIME);
    while (metadata.fetch().topics().size() < NUM_TOPICS) {
        Thread.sleep(10);
        metadataClient.refreshMetadata();
    }
    loadMonitorTaskRunner.start(true);
    Set<TopicPartition> partitionsToSample = new HashSet<>();
    for (int i = 0; i < NUM_TOPICS; i++) {
        for (int j = 0; j < NUM_PARTITIONS; j++) {
            partitionsToSample.add(new TopicPartition(TOPIC_PREFIX + i, j));
        }
    }
    long startMs = System.currentTimeMillis();
    BlockingQueue<PartitionMetricSample> sampleQueue = mockPartitionMetricSampleAggregator.metricSampleQueue();
    while (!partitionsToSample.isEmpty() && System.currentTimeMillis() < startMs + 10000) {
        PartitionMetricSample sample = sampleQueue.poll();
        if (sample != null) {
            assertTrue("The topic partition should have been sampled and sampled only once.", partitionsToSample.contains(sample.entity().tp()));
            partitionsToSample.remove(sample.entity().tp());
        }
    }
    assertTrue("Did not see sample for partitions " + Arrays.toString(partitionsToSample.toArray()), partitionsToSample.isEmpty());
    fetcherManager.shutdown();
    assertTrue(sampleQueue.isEmpty());
}
Also used : MetricFetcherManager(com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricFetcherManager) ClusterResourceListeners(org.apache.kafka.common.internals.ClusterResourceListeners) MetricSampler(com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricSampler) MetricRegistry(com.codahale.metrics.MetricRegistry) Metadata(org.apache.kafka.clients.Metadata) LogContext(org.apache.kafka.common.utils.LogContext) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) KafkaBrokerMetricSampleAggregator(com.linkedin.kafka.cruisecontrol.monitor.sampling.aggregator.KafkaBrokerMetricSampleAggregator) MetadataClient(com.linkedin.kafka.cruisecontrol.common.MetadataClient) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class SamplingFetcher method usePartitionMetricSamples.

@Override
protected void usePartitionMetricSamples(Set<PartitionMetricSample> partitionMetricSamples) {
    // Give an initial capacity to avoid resizing.
    Set<TopicPartition> returnedPartitions = new HashSet<>();
    // Ignore the null value if the metric sampler did not return a sample
    if (partitionMetricSamples != null) {
        int discarded = 0;
        Iterator<PartitionMetricSample> iter = partitionMetricSamples.iterator();
        while (iter.hasNext()) {
            PartitionMetricSample partitionMetricSample = iter.next();
            TopicPartition tp = partitionMetricSample.entity().tp();
            if (_assignedPartitions.contains(tp)) {
                // we fill in the cpu utilization based on the model in case user did not fill it in.
                if (_useLinearRegressionModel && ModelParameters.trainingCompleted()) {
                    partitionMetricSample.record(KafkaMetricDef.commonMetricDef().metricInfo(CPU_USAGE.name()), estimateLeaderCpuUtilUsingLinearRegressionModel(partitionMetricSample));
                }
                // we close the metric sample in case the implementation forgot to do so.
                partitionMetricSample.close(_endTimeMs);
                // We remove the sample from the returning set if it is not accepted.
                if (_partitionMetricSampleAggregator.addSample(partitionMetricSample, _leaderValidation)) {
                    LOG.trace("Enqueued partition metric sample {}", partitionMetricSample);
                } else {
                    iter.remove();
                    discarded++;
                    LOG.trace("Failed to add partition metric sample {}", partitionMetricSample);
                }
                returnedPartitions.add(tp);
            } else {
                LOG.warn("Collected partition metric sample for partition {} which is not an assigned partition. " + "The metric sample will be ignored.", tp);
            }
        }
        LOG.info("Collected {}{} partition metric samples for {} partitions. Total partition assigned: {}.", partitionMetricSamples.size(), discarded > 0 ? String.format("(%d discarded)", discarded) : "", returnedPartitions.size(), _assignedPartitions.size());
    } else {
        LOG.warn("Failed to collect partition metric samples for {} assigned partitions", _assignedPartitions.size());
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) HashSet(java.util.HashSet)

Example 5 with PartitionMetricSample

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample in project cruise-control by linkedin.

the class CruiseControlMetricsProcessor method addPartitionMetricSamples.

/**
 * Add the partition metric samples to the provided set.
 *
 * @param cluster Kafka cluster
 * @param partitionsDotNotHandled The partitions to get samples. The topic partition name may have dots.
 * @param partitionMetricSamples The set to add the partition samples to.
 * @return The number of skipped partitions by broker ids. A broker id of {@link SamplingUtils#UNRECOGNIZED_BROKER_ID}
 *         indicates unrecognized broker.
 */
private Map<Integer, Integer> addPartitionMetricSamples(Cluster cluster, Set<TopicPartition> partitionsDotNotHandled, Set<PartitionMetricSample> partitionMetricSamples) {
    Map<Integer, Integer> skippedPartitionByBroker = new HashMap<>();
    Map<Integer, Map<String, Integer>> leaderDistribution = leaderDistribution(cluster);
    for (TopicPartition tpDotNotHandled : partitionsDotNotHandled) {
        try {
            PartitionMetricSample sample = buildPartitionMetricSample(cluster, leaderDistribution, tpDotNotHandled, _brokerLoad, _maxMetricTimestamp, _cachedNumCoresByBroker, skippedPartitionByBroker);
            if (sample != null) {
                LOG.trace("Added partition metrics sample for {}.", tpDotNotHandled);
                partitionMetricSamples.add(sample);
            }
        } catch (Exception e) {
            LOG.error("Error building partition metric sample for {}.", tpDotNotHandled, e);
            skippedPartitionByBroker.merge(UNRECOGNIZED_BROKER_ID, 1, Integer::sum);
        }
    }
    return skippedPartitionByBroker;
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample) SamplingUtils.buildPartitionMetricSample(com.linkedin.kafka.cruisecontrol.monitor.sampling.SamplingUtils.buildPartitionMetricSample) HashMap(java.util.HashMap) Map(java.util.Map) UnknownVersionException(com.linkedin.kafka.cruisecontrol.metricsreporter.exception.UnknownVersionException) TimeoutException(java.util.concurrent.TimeoutException) BrokerCapacityResolutionException(com.linkedin.kafka.cruisecontrol.exception.BrokerCapacityResolutionException)

Aggregations

PartitionMetricSample (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.PartitionMetricSample)8 Test (org.junit.Test)5 TopicPartition (org.apache.kafka.common.TopicPartition)4 KafkaCruiseControlConfig (com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig)3 HashSet (java.util.HashSet)3 Metadata (org.apache.kafka.clients.Metadata)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 MetricDef (com.linkedin.cruisecontrol.metricdef.MetricDef)2 MetadataClient (com.linkedin.kafka.cruisecontrol.common.MetadataClient)2 CruiseControlMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric)2 RawMetricType (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.RawMetricType)2 KafkaMetricDef (com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef)2 MetricFetcherManager (com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricFetcherManager)2 MetricSampler (com.linkedin.kafka.cruisecontrol.monitor.sampling.MetricSampler)2 KafkaBrokerMetricSampleAggregator (com.linkedin.kafka.cruisecontrol.monitor.sampling.aggregator.KafkaBrokerMetricSampleAggregator)2 Cluster (org.apache.kafka.common.Cluster)2 Node (org.apache.kafka.common.Node)2 ClusterResourceListeners (org.apache.kafka.common.internals.ClusterResourceListeners)2 LogContext (org.apache.kafka.common.utils.LogContext)2 MetricInfo (com.linkedin.cruisecontrol.metricdef.MetricInfo)1