Search in sources :

Example 41 with Broker

use of com.linkedin.kafka.cruisecontrol.model.Broker in project cruise-control by linkedin.

the class AbstractGoal method optimize.

@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    _succeeded = true;
    LOG.debug("Starting optimization for {}.", name());
    // Initialize pre-optimized stats.
    ClusterModelStats statsBeforeOptimization = clusterModel.getClusterStats(_balancingConstraint);
    LOG.trace("[PRE - {}] {}", name(), statsBeforeOptimization);
    _finished = false;
    long goalStartTime = System.currentTimeMillis();
    initGoalState(clusterModel, excludedTopics);
    Collection<Broker> deadBrokers = clusterModel.deadBrokers();
    while (!_finished) {
        for (Broker broker : brokersToBalance(clusterModel)) {
            rebalanceForBroker(broker, clusterModel, optimizedGoals, excludedTopics);
        }
        updateGoalState(clusterModel, excludedTopics);
    }
    ClusterModelStats statsAfterOptimization = clusterModel.getClusterStats(_balancingConstraint);
    LOG.trace("[POST - {}] {}", name(), statsAfterOptimization);
    LOG.debug("Finished optimization for {} in {}ms.", name(), System.currentTimeMillis() - goalStartTime);
    LOG.trace("Cluster after optimization is {}", clusterModel);
    // We only ensure the optimization did not make stats worse when it is not self-healing.
    if (deadBrokers.isEmpty()) {
        ClusterModelStatsComparator comparator = clusterModelStatsComparator();
        // Throw exception when the stats before optimization is preferred.
        if (comparator.compare(statsAfterOptimization, statsBeforeOptimization) < 0) {
            throw new OptimizationFailureException("Optimization for Goal " + name() + " failed because the optimized" + "result is worse than before. Detail reason: " + comparator.explainLastComparison());
        }
    }
    return _succeeded;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ClusterModelStats(com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)

Example 42 with Broker

use of com.linkedin.kafka.cruisecontrol.model.Broker in project cruise-control by linkedin.

the class AbstractGoal method maybeApplyBalancingAction.

/**
 * Attempt to apply the given balancing action to the given replica in the given cluster. The application
 * considers the candidate brokers as the potential destination brokers for replica movement or the location of
 * followers for leadership transfer. If the movement attempt succeeds, the function returns the broker id of the
 * destination, otherwise the function returns null.
 *
 * @param clusterModel    The state of the cluster.
 * @param replica         Replica to be applied the given balancing action.
 * @param candidateBrokers Candidate brokers as the potential destination brokers for replica movement or the location
 *                        of followers for leadership transfer.
 * @param action          Balancing action.
 * @param optimizedGoals  Optimized goals.
 * @return Broker id of the destination if the movement attempt succeeds, null otherwise.
 */
protected Broker maybeApplyBalancingAction(ClusterModel clusterModel, Replica replica, Collection<Broker> candidateBrokers, ActionType action, Set<Goal> optimizedGoals) {
    // In self healing mode, allow a move only from dead to alive brokers.
    if (!clusterModel.deadBrokers().isEmpty() && replica.originalBroker().isAlive()) {
        // return null;
        LOG.trace("Applying {} to a replica in a healthy broker in self-healing mode.", action);
    }
    Collection<Broker> eligibleBrokers = getEligibleBrokers(clusterModel, replica, candidateBrokers);
    for (Broker broker : eligibleBrokers) {
        BalancingAction proposal = new BalancingAction(replica.topicPartition(), replica.broker().id(), broker.id(), action);
        if (!legitMove(replica, broker, action)) {
            LOG.trace("Replica move is not legit for {}.", proposal);
            continue;
        }
        if (!selfSatisfied(clusterModel, proposal)) {
            LOG.trace("Unable to self-satisfy proposal {}.", proposal);
            continue;
        }
        ActionAcceptance acceptance = AnalyzerUtils.isProposalAcceptableForOptimizedGoals(optimizedGoals, proposal, clusterModel);
        LOG.trace("Trying to apply legit and self-satisfied action {}, actionAcceptance = {}", proposal, acceptance);
        if (acceptance == ACCEPT) {
            if (action == ActionType.LEADERSHIP_MOVEMENT) {
                clusterModel.relocateLeadership(replica.topicPartition(), replica.broker().id(), broker.id());
            } else if (action == ActionType.REPLICA_MOVEMENT) {
                clusterModel.relocateReplica(replica.topicPartition(), replica.broker().id(), broker.id());
            }
            return broker;
        }
    }
    return null;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) BalancingAction(com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction) ActionAcceptance(com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)

Example 43 with Broker

use of com.linkedin.kafka.cruisecontrol.model.Broker in project cruise-control by linkedin.

the class CapacityGoal method rebalanceForBroker.

/**
 * (1) REBALANCE BY LEADERSHIP MOVEMENT:
 * Perform leadership movement to ensure that the load on brokers and/or hosts (see {@link Resource#isHostResource()}
 * and {@link Resource#isBrokerResource()}) for the outbound network load and CPU is under the capacity limit.
 *
 * <p>
 * (2) REBALANCE BY REPLICA MOVEMENT:
 * Perform optimization via replica movement for the given resource to ensure rebalance: The load on brokers and/or
 * hosts (see {@link Resource#isHostResource()} and {@link Resource#isBrokerResource()}) for the given resource is
 * under the capacity limit.
 *
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
    Resource currentResource = resource();
    double capacityThreshold = _balancingConstraint.capacityThreshold(currentResource);
    double brokerCapacityLimit = broker.capacityFor(currentResource) * capacityThreshold;
    double hostCapacityLimit = broker.host().capacityFor(currentResource) * capacityThreshold;
    boolean isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
    if (!isUtilizationOverLimit) {
        // The utilization of source broker and/or host for the current resource is already under the capacity limit.
        return;
    }
    // First try REBALANCE BY LEADERSHIP MOVEMENT:
    if (currentResource == Resource.NW_OUT || currentResource == Resource.CPU) {
        // Sort replicas by descending order of preference to relocate. Preference is based on resource cost.
        // Only leaders in the source broker are sorted.
        List<Replica> sortedLeadersInSourceBroker = broker.sortedLeadersFor(currentResource);
        for (Replica leader : sortedLeadersInSourceBroker) {
            if (shouldExclude(leader, excludedTopics)) {
                continue;
            }
            // Get followers of this leader and sort them in ascending order by their broker resource utilization.
            List<Replica> followers = clusterModel.partition(leader.topicPartition()).followers();
            clusterModel.sortReplicasInAscendingOrderByBrokerResourceUtilization(followers, currentResource);
            List<Broker> eligibleBrokers = followers.stream().map(Replica::broker).collect(Collectors.toList());
            Broker b = maybeApplyBalancingAction(clusterModel, leader, eligibleBrokers, ActionType.LEADERSHIP_MOVEMENT, optimizedGoals);
            if (b == null) {
                LOG.debug("Failed to move leader replica {} to any other brokers in {}", leader, eligibleBrokers);
            }
            isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
            // Broker utilization has successfully been reduced under the capacity limit for the current resource.
            if (!isUtilizationOverLimit) {
                break;
            }
        }
    }
    // If leader movement did not work, move replicas.
    if (isUtilizationOverLimit) {
        // Get sorted healthy brokers under host and/or broker capacity limit (depending on the current resource).
        List<Broker> sortedHealthyBrokersUnderCapacityLimit = clusterModel.sortedHealthyBrokersUnderThreshold(currentResource, capacityThreshold);
        // be satisfied, throw an exception.
        for (Replica replica : broker.sortedReplicas(currentResource)) {
            if (shouldExclude(replica, excludedTopics)) {
                continue;
            }
            // Unless the target broker would go over the host- and/or broker-level capacity,
            // the movement will be successful.
            Broker b = maybeApplyBalancingAction(clusterModel, replica, sortedHealthyBrokersUnderCapacityLimit, ActionType.REPLICA_MOVEMENT, optimizedGoals);
            if (b == null) {
                LOG.debug("Failed to move replica {} to any broker in {}", replica, sortedHealthyBrokersUnderCapacityLimit);
            }
            // If capacity limit was not satisfied before, check if it is satisfied now.
            isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
            // Broker utilization has successfully been reduced under the capacity limit for the current resource.
            if (!isUtilizationOverLimit) {
                break;
            }
        }
    }
    if (isUtilizationOverLimit) {
        if (!currentResource.isHostResource()) {
            // Utilization is above the capacity limit after all replicas in the given source broker were checked.
            throw new OptimizationFailureException("Violated capacity limit of " + brokerCapacityLimit + " via broker " + "utilization of " + broker.load().expectedUtilizationFor(currentResource) + " with broker id " + broker.id() + " for resource " + currentResource);
        } else {
            throw new OptimizationFailureException("Violated capacity limit of " + hostCapacityLimit + " via host " + "utilization of " + broker.host().load().expectedUtilizationFor(currentResource) + " with hostname " + broker.host().name() + " for resource " + currentResource);
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 44 with Broker

use of com.linkedin.kafka.cruisecontrol.model.Broker in project cruise-control by linkedin.

the class CapacityGoal method actionAcceptance.

/**
 * Check whether the given action is acceptable by this goal. An action is acceptable by a goal if it satisfies
 * requirements of the goal. Requirements(hard goal): Capacity.
 *
 * ## Leadership Movement: impacts only (1) network outbound and (2) CPU resources (See
 * {@link DiskCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)} and
 * {@link NetworkInboundCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)}).
 *   (1) Check if leadership NW_OUT movement is acceptable: NW_OUT movement carries all of leader's NW_OUT load.
 *   (2) Check if leadership CPU movement is acceptable: In reality, CPU movement carries only a fraction of
 * leader's CPU load.
 * To optimize CC performance, we avoid calculation of the expected leadership CPU utilization, and assume that
 * if (action.balancingAction() == ActionType.LEADERSHIP_MOVEMENT && resource() == Resource.CPU),
 * then the expected leadership CPU utilization would be the full CPU utilization of the leader.
 * <p>
 * ## Replica Movement: impacts any resource.
 * ## Replica Swap: impacts any resource.
 *
 * @param action Action to be checked for acceptance.
 * @param clusterModel The state of the cluster.
 * @return {@link ActionAcceptance#ACCEPT} if the action is acceptable by this goal,
 * {@link ActionAcceptance#REPLICA_REJECT} otherwise.
 */
@Override
public ActionAcceptance actionAcceptance(BalancingAction action, ClusterModel clusterModel) {
    Replica sourceReplica = clusterModel.broker(action.sourceBrokerId()).replica(action.topicPartition());
    Broker destinationBroker = clusterModel.broker(action.destinationBrokerId());
    switch(action.balancingAction()) {
        case REPLICA_SWAP:
            Replica destinationReplica = destinationBroker.replica(action.destinationTopicPartition());
            return isSwapAcceptableForCapacity(sourceReplica, destinationReplica) ? ACCEPT : REPLICA_REJECT;
        case REPLICA_MOVEMENT:
        case LEADERSHIP_MOVEMENT:
            return isMovementAcceptableForCapacity(sourceReplica, destinationBroker) ? ACCEPT : REPLICA_REJECT;
        default:
            throw new IllegalArgumentException("Unsupported balancing action " + action.balancingAction() + " is provided.");
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 45 with Broker

use of com.linkedin.kafka.cruisecontrol.model.Broker in project cruise-control by linkedin.

the class LeaderBytesInDistributionGoal method brokersToBalance.

@Override
protected SortedSet<Broker> brokersToBalance(ClusterModel clusterModel) {
    // Brokers having inbound network traffic over the balance threshold for inbound traffic are eligible for balancing.
    SortedSet<Broker> brokersToBalance = clusterModel.brokers();
    for (Iterator<Broker> iterator = brokersToBalance.iterator(); iterator.hasNext(); ) {
        Broker broker = iterator.next();
        double brokerUtilizationForNwIn = broker.leadershipLoadForNwResources().expectedUtilizationFor(Resource.NW_IN);
        if (brokerUtilizationForNwIn <= balanceThreshold(clusterModel, broker.id())) {
            iterator.remove();
        }
    }
    return brokersToBalance;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker)

Aggregations

Broker (com.linkedin.kafka.cruisecontrol.model.Broker)50 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)27 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)15 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)12 HashSet (java.util.HashSet)12 ArrayList (java.util.ArrayList)10 TreeSet (java.util.TreeSet)10 BalancingAction (com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction)9 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)9 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)9 List (java.util.List)9 ActionAcceptance (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)8 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)7 ActionType (com.linkedin.kafka.cruisecontrol.analyzer.ActionType)6 ModelCompletenessRequirements (com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements)6 Set (java.util.Set)6 Logger (org.slf4j.Logger)6 LoggerFactory (org.slf4j.LoggerFactory)6 ACCEPT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT)5 REPLICA_REJECT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.REPLICA_REJECT)5