Search in sources :

Example 6 with BrokerCapacityInfo

use of com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo in project cruise-control by linkedin.

the class MonitorUtils method populatePartitionLoad.

/**
 * Create replicas of the partition with the given (1) identifier and (2) load information to populate the given cluster model.
 * If partition with the given identifier does not exist in the given cluster, do nothing.
 *
 * @param cluster Kafka cluster.
 * @param clusterModel The cluster model to populate load information.
 * @param tp Topic partition that identifies the partition to populate the load for.
 * @param valuesAndExtrapolations The values and extrapolations of the leader replica.
 * @param replicaPlacementInfo The distribution of replicas over broker logdirs if available, {@code null} otherwise.
 * @param brokerCapacityConfigResolver The resolver for retrieving broker capacities.
 * @param allowCapacityEstimation whether allow capacity estimation in cluster model if the underlying live broker capacity is unavailable.
 */
static void populatePartitionLoad(Cluster cluster, ClusterModel clusterModel, TopicPartition tp, ValuesAndExtrapolations valuesAndExtrapolations, Map<TopicPartition, Map<Integer, String>> replicaPlacementInfo, BrokerCapacityConfigResolver brokerCapacityConfigResolver, boolean allowCapacityEstimation) throws TimeoutException {
    PartitionInfo partitionInfo = cluster.partition(tp);
    // If partition info does not exist, the topic may have been deleted.
    if (partitionInfo != null) {
        Set<Integer> aliveBrokers = cluster.nodes().stream().mapToInt(Node::id).boxed().collect(Collectors.toSet());
        boolean needToAdjustCpuUsage = true;
        Set<Integer> deadBrokersWithUnknownCapacity = new HashSet<>();
        for (int index = 0; index < partitionInfo.replicas().length; index++) {
            Node replica = partitionInfo.replicas()[index];
            String rack = getRackHandleNull(replica);
            BrokerCapacityInfo brokerCapacity;
            try {
                // Do not allow capacity estimation for dead brokers.
                brokerCapacity = brokerCapacityConfigResolver.capacityForBroker(rack, replica.host(), replica.id(), BROKER_CAPACITY_FETCH_TIMEOUT_MS, aliveBrokers.contains(replica.id()) && allowCapacityEstimation);
            } catch (TimeoutException | BrokerCapacityResolutionException e) {
                // Capacity resolver may not be able to return the capacity information of dead brokers.
                if (!aliveBrokers.contains(replica.id())) {
                    brokerCapacity = new BrokerCapacityInfo(EMPTY_BROKER_CAPACITY);
                    deadBrokersWithUnknownCapacity.add(replica.id());
                } else {
                    String errorMessage = String.format("Unable to retrieve capacity for broker %d. This may be caused by churn in " + "the cluster, please retry.", replica.id());
                    LOG.warn(errorMessage, e);
                    throw new TimeoutException(errorMessage);
                }
            }
            clusterModel.handleDeadBroker(rack, replica.id(), brokerCapacity);
            boolean isLeader;
            if (partitionInfo.leader() == null) {
                LOG.warn("Detected offline partition {}-{}, skipping", partitionInfo.topic(), partitionInfo.partition());
                continue;
            } else {
                isLeader = replica.id() == partitionInfo.leader().id();
            }
            boolean isOffline = Arrays.stream(partitionInfo.offlineReplicas()).anyMatch(offlineReplica -> offlineReplica.id() == replica.id());
            String logdir = replicaPlacementInfo == null ? null : replicaPlacementInfo.get(tp).get(replica.id());
            // If the replica's logdir is null, it is either because replica placement information is not populated for the cluster
            // model or this replica is hosted on a dead disk and is not considered for intra-broker replica operations.
            clusterModel.createReplica(rack, replica.id(), tp, index, isLeader, isOffline, logdir, false);
            clusterModel.setReplicaLoad(rack, replica.id(), tp, getAggregatedMetricValues(valuesAndExtrapolations, cluster.partition(tp), isLeader, needToAdjustCpuUsage), valuesAndExtrapolations.windows());
            needToAdjustCpuUsage = false;
        }
        if (!deadBrokersWithUnknownCapacity.isEmpty()) {
            LOG.info("Assign empty capacity to brokers {} because they are dead and capacity resolver is unable to fetch their capacity.", deadBrokersWithUnknownCapacity);
        }
    }
}
Also used : BrokerCapacityResolutionException(com.linkedin.kafka.cruisecontrol.exception.BrokerCapacityResolutionException) BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) Node(org.apache.kafka.common.Node) PartitionInfo(org.apache.kafka.common.PartitionInfo) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 7 with BrokerCapacityInfo

use of com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo in project cruise-control by linkedin.

the class CruiseControlMetricsProcessorTest method testMissingBrokerCapacity.

@Test(expected = IllegalArgumentException.class)
public void testMissingBrokerCapacity() throws TimeoutException, BrokerCapacityResolutionException {
    Set<CruiseControlMetric> metrics = getCruiseControlMetrics();
    // All estimated.
    BrokerCapacityConfigResolver brokerCapacityConfigResolver = EasyMock.mock(BrokerCapacityConfigResolver.class);
    EasyMock.expect(brokerCapacityConfigResolver.capacityForBroker(EasyMock.anyString(), EasyMock.anyString(), EasyMock.anyInt(), EasyMock.anyLong(), EasyMock.anyBoolean())).andReturn(new BrokerCapacityInfo(Collections.emptyMap(), Collections.emptyMap(), MOCK_NUM_CPU_CORES)).anyTimes();
    EasyMock.replay(brokerCapacityConfigResolver);
    CruiseControlMetricsProcessor processor = new CruiseControlMetricsProcessor(brokerCapacityConfigResolver, false);
    for (CruiseControlMetric cruiseControlMetric : metrics) {
        processor.addMetric(cruiseControlMetric);
    }
    EasyMock.verify(brokerCapacityConfigResolver);
    Cluster cluster = getCluster();
    processor.process(cluster, TEST_PARTITIONS, MetricSampler.SamplingMode.ALL);
}
Also used : CruiseControlMetric(com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric) BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) BrokerCapacityConfigResolver(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityConfigResolver) Cluster(org.apache.kafka.common.Cluster) Test(org.junit.Test)

Example 8 with BrokerCapacityInfo

use of com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo in project cruise-control by linkedin.

the class CruiseControlMetricsProcessorTest method mockBrokerCapacityConfigResolver.

private static BrokerCapacityConfigResolver mockBrokerCapacityConfigResolver() throws TimeoutException, BrokerCapacityResolutionException {
    BrokerCapacityConfigResolver brokerCapacityConfigResolver = EasyMock.mock(BrokerCapacityConfigResolver.class);
    EasyMock.expect(brokerCapacityConfigResolver.capacityForBroker(EasyMock.anyString(), EasyMock.anyString(), EasyMock.anyInt(), EasyMock.anyLong(), EasyMock.anyBoolean())).andReturn(new BrokerCapacityInfo(EMPTY_BROKER_CAPACITY, Collections.emptyMap(), MOCK_NUM_CPU_CORES)).anyTimes();
    EasyMock.replay(brokerCapacityConfigResolver);
    return brokerCapacityConfigResolver;
}
Also used : BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) BrokerCapacityConfigResolver(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityConfigResolver)

Example 9 with BrokerCapacityInfo

use of com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo in project cruise-control by linkedin.

the class KafkaAssignerDiskUsageDistributionGoalTest method createClusterModel.

/**
 * The replica distribution is as below.
 *
 * L - Leader
 * F - Follower
 * S - Secondary Follower
 *
 *         r0             r1         r2          r3
 *        /  \             |          |           |
 *      b0    b1          b2         b3          b4
 *   T0P0(L)  T0P1(L)   T0P2(L)    T1P0(L)     T1P1(L)
 *   T1P2(L)  T2P0(L)   T2P1(L)    T2P2(L)     T0P0(F)
 *   T0P2(F)  T1P0(F)   T0P1(F)    T1P1(F)     T1P2(F)
 *   T2P1(F)  T2P2(F)   T2P0(F)    T0P0(S)     T0P1(S)
 *   T1P1(S)            T1P0(S)    T0P2(S)     T2P0(S)
 *                      T1P2(S)    T2P1(S)     T2P2(S)
 * The sizes of each broker are:
 * b0: 190
 * b1: 260
 * b2: 360
 * b3: 250
 * b4: 290
 *
 * The average broker size should be: 270
 * @return Cluster model with the documented properties for testing.
 */
private ClusterModel createClusterModel() {
    Map<TopicPartition, Float> partitionSize = new HashMap<>();
    partitionSize.put(T0P0, 10f);
    partitionSize.put(T0P1, 90f);
    partitionSize.put(T0P2, 20f);
    partitionSize.put(T1P0, 80f);
    partitionSize.put(T1P1, 30f);
    partitionSize.put(T1P2, 70f);
    partitionSize.put(T2P0, 40f);
    partitionSize.put(T2P1, 60f);
    partitionSize.put(T2P2, 50f);
    final int numRacks = 4;
    ClusterModel clusterModel = new ClusterModel(new ModelGeneration(0, 0), 1.0);
    for (int i = 0; i < numRacks; i++) {
        clusterModel.createRack("r" + i);
    }
    BrokerCapacityInfo commonBrokerCapacityInfo = new BrokerCapacityInfo(TestConstants.BROKER_CAPACITY);
    int i = 0;
    for (; i < 2; i++) {
        clusterModel.createBroker("r0", "h" + i, i, commonBrokerCapacityInfo, false);
    }
    for (int j = 1; j < numRacks; j++, i++) {
        clusterModel.createBroker("r" + j, "h" + i, i, commonBrokerCapacityInfo, false);
    }
    clusterModel.createReplica("r0", 0, T0P0, 0, true);
    clusterModel.createReplica("r0", 0, T1P2, 0, true);
    clusterModel.createReplica("r0", 1, T0P1, 0, true);
    clusterModel.createReplica("r0", 1, T2P0, 0, true);
    clusterModel.createReplica("r1", 2, T0P2, 0, true);
    clusterModel.createReplica("r1", 2, T2P1, 0, true);
    clusterModel.createReplica("r2", 3, T1P0, 0, true);
    clusterModel.createReplica("r2", 3, T2P2, 0, true);
    clusterModel.createReplica("r3", 4, T1P1, 0, true);
    clusterModel.createReplica("r0", 0, T0P2, 1, false);
    clusterModel.createReplica("r0", 0, T2P1, 1, false);
    clusterModel.createReplica("r0", 1, T1P0, 1, false);
    clusterModel.createReplica("r0", 1, T2P2, 1, false);
    clusterModel.createReplica("r1", 2, T0P1, 1, false);
    clusterModel.createReplica("r1", 2, T2P0, 1, false);
    clusterModel.createReplica("r2", 3, T1P1, 1, false);
    clusterModel.createReplica("r3", 4, T0P0, 1, false);
    clusterModel.createReplica("r3", 4, T1P2, 1, false);
    clusterModel.createReplica("r0", 0, T1P1, 2, false);
    clusterModel.createReplica("r1", 2, T1P0, 2, false);
    clusterModel.createReplica("r1", 2, T1P2, 2, false);
    clusterModel.createReplica("r2", 3, T0P0, 2, false);
    clusterModel.createReplica("r2", 3, T0P2, 2, false);
    clusterModel.createReplica("r2", 3, T2P1, 2, false);
    clusterModel.createReplica("r3", 4, T0P1, 2, false);
    clusterModel.createReplica("r3", 4, T2P0, 2, false);
    clusterModel.createReplica("r3", 4, T2P2, 2, false);
    List<Long> windows = Collections.singletonList(1L);
    clusterModel.setReplicaLoad("r0", 0, T0P0, getAggregatedMetricValues(partitionSize.get(T0P0)), windows);
    clusterModel.setReplicaLoad("r0", 0, T1P2, getAggregatedMetricValues(partitionSize.get(T1P2)), windows);
    clusterModel.setReplicaLoad("r0", 0, T0P2, getAggregatedMetricValues(partitionSize.get(T0P2)), windows);
    clusterModel.setReplicaLoad("r0", 0, T2P1, getAggregatedMetricValues(partitionSize.get(T2P1)), windows);
    clusterModel.setReplicaLoad("r0", 0, T1P1, getAggregatedMetricValues(partitionSize.get(T1P1)), windows);
    clusterModel.setReplicaLoad("r0", 1, T0P1, getAggregatedMetricValues(partitionSize.get(T0P1)), windows);
    clusterModel.setReplicaLoad("r0", 1, T2P0, getAggregatedMetricValues(partitionSize.get(T2P0)), windows);
    clusterModel.setReplicaLoad("r0", 1, T1P0, getAggregatedMetricValues(partitionSize.get(T1P0)), windows);
    clusterModel.setReplicaLoad("r0", 1, T2P2, getAggregatedMetricValues(partitionSize.get(T2P2)), windows);
    clusterModel.setReplicaLoad("r1", 2, T0P2, getAggregatedMetricValues(partitionSize.get(T0P2)), windows);
    clusterModel.setReplicaLoad("r1", 2, T2P1, getAggregatedMetricValues(partitionSize.get(T2P1)), windows);
    clusterModel.setReplicaLoad("r1", 2, T0P1, getAggregatedMetricValues(partitionSize.get(T0P1)), windows);
    clusterModel.setReplicaLoad("r1", 2, T2P0, getAggregatedMetricValues(partitionSize.get(T2P0)), windows);
    clusterModel.setReplicaLoad("r1", 2, T1P0, getAggregatedMetricValues(partitionSize.get(T1P0)), windows);
    clusterModel.setReplicaLoad("r1", 2, T1P2, getAggregatedMetricValues(partitionSize.get(T1P2)), windows);
    clusterModel.setReplicaLoad("r2", 3, T1P0, getAggregatedMetricValues(partitionSize.get(T1P0)), windows);
    clusterModel.setReplicaLoad("r2", 3, T2P2, getAggregatedMetricValues(partitionSize.get(T2P2)), windows);
    clusterModel.setReplicaLoad("r2", 3, T1P1, getAggregatedMetricValues(partitionSize.get(T1P1)), windows);
    clusterModel.setReplicaLoad("r2", 3, T0P0, getAggregatedMetricValues(partitionSize.get(T0P0)), windows);
    clusterModel.setReplicaLoad("r2", 3, T0P2, getAggregatedMetricValues(partitionSize.get(T0P2)), windows);
    clusterModel.setReplicaLoad("r2", 3, T2P1, getAggregatedMetricValues(partitionSize.get(T2P1)), windows);
    clusterModel.setReplicaLoad("r3", 4, T1P1, getAggregatedMetricValues(partitionSize.get(T1P1)), windows);
    clusterModel.setReplicaLoad("r3", 4, T0P0, getAggregatedMetricValues(partitionSize.get(T0P0)), windows);
    clusterModel.setReplicaLoad("r3", 4, T1P2, getAggregatedMetricValues(partitionSize.get(T1P2)), windows);
    clusterModel.setReplicaLoad("r3", 4, T0P1, getAggregatedMetricValues(partitionSize.get(T0P1)), windows);
    clusterModel.setReplicaLoad("r3", 4, T2P0, getAggregatedMetricValues(partitionSize.get(T2P0)), windows);
    clusterModel.setReplicaLoad("r3", 4, T2P2, getAggregatedMetricValues(partitionSize.get(T2P2)), windows);
    return clusterModel;
}
Also used : ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) HashMap(java.util.HashMap) ModelGeneration(com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration) TopicPartition(org.apache.kafka.common.TopicPartition) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Example 10 with BrokerCapacityInfo

use of com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo in project cruise-control by linkedin.

the class RandomClusterTest method testNewBrokers.

/**
 * This test first creates a random cluster, balance it. Then add two new brokers, balance the cluster again.
 */
public void testNewBrokers() throws Exception {
    ClusterModel clusterModel = rebalance();
    ClusterModel clusterWithNewBroker = new ClusterModel(new ModelGeneration(0, 0L), 1.0);
    for (Broker b : clusterModel.brokers()) {
        clusterWithNewBroker.createRack(b.rack().id());
        Map<Resource, Double> brokerCapacity = new HashMap<>();
        for (Resource r : Resource.cachedValues()) {
            brokerCapacity.put(r, b.capacityFor(r));
        }
        BrokerCapacityInfo brokerCapacityInfo = new BrokerCapacityInfo(brokerCapacity);
        clusterWithNewBroker.createBroker(b.rack().id(), Integer.toString(b.id()), b.id(), brokerCapacityInfo, false);
    }
    for (Map.Entry<String, List<Partition>> entry : clusterModel.getPartitionsByTopic().entrySet()) {
        for (Partition p : entry.getValue()) {
            int index = 0;
            for (Replica r : p.replicas()) {
                clusterWithNewBroker.createReplica(r.broker().rack().id(), r.broker().id(), p.topicPartition(), index++, r.isLeader());
            }
        }
    }
    for (Broker b : clusterModel.brokers()) {
        for (Replica replica : b.replicas()) {
            AggregatedMetricValues aggregatedMetricValues = clusterModel.broker(b.id()).replica(replica.topicPartition()).load().loadByWindows();
            clusterWithNewBroker.setReplicaLoad(b.rack().id(), b.id(), replica.topicPartition(), aggregatedMetricValues, clusterModel.load().windows());
        }
    }
    BrokerCapacityInfo commonBrokerCapacityInfo = new BrokerCapacityInfo(TestConstants.BROKER_CAPACITY);
    for (int i = 1; i < 3; i++) {
        clusterWithNewBroker.createBroker(Integer.toString(i), Integer.toString(i + clusterModel.brokers().size() - 1), i + clusterModel.brokers().size() - 1, commonBrokerCapacityInfo, false);
        clusterWithNewBroker.setBrokerState(i + clusterModel.brokers().size() - 1, Broker.State.NEW);
    }
    assertTrue("Random Cluster Test failed to improve the existing state with new brokers.", OptimizationVerifier.executeGoalsFor(_balancingConstraint, clusterWithNewBroker, _goalNameByPriority, _verifications));
}
Also used : Partition(com.linkedin.kafka.cruisecontrol.model.Partition) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) HashMap(java.util.HashMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) ModelGeneration(com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

BrokerCapacityInfo (com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo)10 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)4 ModelGeneration (com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration)4 Node (org.apache.kafka.common.Node)4 BrokerCapacityConfigResolver (com.linkedin.kafka.cruisecontrol.config.BrokerCapacityConfigResolver)3 BrokerCapacityResolutionException (com.linkedin.kafka.cruisecontrol.exception.BrokerCapacityResolutionException)3 TimeoutException (java.util.concurrent.TimeoutException)3 Cluster (org.apache.kafka.common.Cluster)3 CruiseControlMetric (com.linkedin.kafka.cruisecontrol.metricsreporter.metric.CruiseControlMetric)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 PartitionInfo (org.apache.kafka.common.PartitionInfo)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 Test (org.junit.Test)2 AggregatedMetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues)1 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)1 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)1 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)1 Partition (com.linkedin.kafka.cruisecontrol.model.Partition)1