Search in sources :

Example 36 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class PotentialNwOutGoal method rebalanceForBroker.

/**
 * Rebalance the given broker without violating the constraints of the current goal and optimized goals.
 *
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) {
    double capacityLimit = broker.capacityFor(Resource.NW_OUT) * _balancingConstraint.capacityThreshold(Resource.NW_OUT);
    boolean estimatedMaxPossibleNwOutOverLimit = !broker.replicas().isEmpty() && clusterModel.potentialLeadershipLoadFor(broker.id()).expectedUtilizationFor(Resource.NW_OUT) > capacityLimit;
    if (!estimatedMaxPossibleNwOutOverLimit) {
        // Estimated max possible utilization in broker is under the limit.
        return;
    }
    // Get candidate brokers
    Set<Broker> candidateBrokers = _selfHealingDeadBrokersOnly ? clusterModel.healthyBrokers() : brokersUnderEstimatedMaxPossibleNwOut(clusterModel);
    // Attempt to move replicas to eligible brokers until either the estimated max possible network out
    // limit requirement is satisfied for the broker or all replicas are checked.
    SortedSet<Replica> replicas = new TreeSet<>(broker.replicas());
    for (Replica replica : replicas) {
        if (shouldExclude(replica, excludedTopics)) {
            continue;
        }
        // Find the eligible brokers that this replica is allowed to move. Unless the target broker would go
        // over the potential outbound network capacity the movement will be successful.
        List<Broker> eligibleBrokers = new ArrayList<>(candidateBrokers);
        eligibleBrokers.removeAll(clusterModel.partition(replica.topicPartition()).partitionBrokers());
        eligibleBrokers.sort((b1, b2) -> Double.compare(b2.leadershipLoadForNwResources().expectedUtilizationFor(Resource.NW_OUT), b1.leadershipLoadForNwResources().expectedUtilizationFor(Resource.NW_OUT)));
        Broker destinationBroker = maybeApplyBalancingAction(clusterModel, replica, eligibleBrokers, ActionType.REPLICA_MOVEMENT, optimizedGoals);
        if (destinationBroker != null) {
            int destinationBrokerId = destinationBroker.id();
            // Check if broker capacity limit is satisfied now.
            estimatedMaxPossibleNwOutOverLimit = !broker.replicas().isEmpty() && clusterModel.potentialLeadershipLoadFor(broker.id()).expectedUtilizationFor(Resource.NW_OUT) > capacityLimit;
            if (!estimatedMaxPossibleNwOutOverLimit) {
                break;
            }
            // Update brokersUnderEstimatedMaxPossibleNwOut (for destination broker).
            double updatedDestBrokerPotentialNwOut = clusterModel.potentialLeadershipLoadFor(destinationBrokerId).expectedUtilizationFor(Resource.NW_OUT);
            if (!_selfHealingDeadBrokersOnly && updatedDestBrokerPotentialNwOut > capacityLimit) {
                candidateBrokers.remove(clusterModel.broker(destinationBrokerId));
            }
        }
    }
    if (estimatedMaxPossibleNwOutOverLimit) {
        // Utilization is above the max possible limit after all replicas in the source broker were checked.
        LOG.warn("Violated estimated max possible network out limit for broker id:{} limit:{} utilization:{}.", broker.id(), capacityLimit, clusterModel.potentialLeadershipLoadFor(broker.id()).expectedUtilizationFor(Resource.NW_OUT));
        _succeeded = false;
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Example 37 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class PotentialNwOutGoal method isReplicaRelocationAcceptable.

/**
 * Check whether the given replica relocation (i.e. move or swap) is acceptable by this goal. Replica relocation is
 * acceptable if it satisfies either of the following:
 *
 * (1) it satisfies {@link #selfSatisfied},
 * (2) replica movement does not make the potential nw outbound goal on destination broker more than the source.
 * (3) replica swap does not make the potential nw outbound goal on source or destination broker more than the max of
 * the initial value on brokers.
 *
 * @param action Replica relocation action (i.e. move or swap) to be checked for acceptance.
 * @param clusterModel The state of the cluster.
 * @return {@link ActionAcceptance#ACCEPT} if the action is acceptable by this goal,
 * {@link ActionAcceptance#REPLICA_REJECT} otherwise.
 */
private ActionAcceptance isReplicaRelocationAcceptable(BalancingAction action, ClusterModel clusterModel) {
    if (selfSatisfied(clusterModel, action)) {
        // it satisfies {@link #selfSatisfied},
        return ACCEPT;
    }
    Replica replica = clusterModel.broker(action.sourceBrokerId()).replica(action.topicPartition());
    double destinationBrokerUtilization = clusterModel.potentialLeadershipLoadFor(clusterModel.broker(action.destinationBrokerId()).id()).expectedUtilizationFor(Resource.NW_OUT);
    double sourceBrokerUtilization = clusterModel.potentialLeadershipLoadFor(replica.broker().id()).expectedUtilizationFor(Resource.NW_OUT);
    double sourceReplicaUtilization = clusterModel.partition(replica.topicPartition()).leader().load().expectedUtilizationFor(Resource.NW_OUT);
    double maxUtilization = Math.max(destinationBrokerUtilization, sourceBrokerUtilization);
    switch(action.balancingAction()) {
        case REPLICA_SWAP:
            double destinationReplicaUtilization = clusterModel.partition(action.destinationTopicPartition()).leader().load().expectedUtilizationFor(Resource.NW_OUT);
            // Check source broker potential NW_OUT violation.
            if (sourceBrokerUtilization + destinationReplicaUtilization - sourceReplicaUtilization > maxUtilization) {
                return REPLICA_REJECT;
            }
            return destinationBrokerUtilization + sourceReplicaUtilization - destinationReplicaUtilization <= maxUtilization ? ACCEPT : REPLICA_REJECT;
        case REPLICA_MOVEMENT:
            return destinationBrokerUtilization + sourceReplicaUtilization <= maxUtilization ? ACCEPT : REPLICA_REJECT;
        default:
            throw new IllegalArgumentException("Unsupported balancing action " + action.balancingAction() + " is provided.");
    }
}
Also used : Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 38 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class PotentialNwOutGoal method updateGoalState.

/**
 * Update goal state after one round of self-healing / rebalance.
 *
 * @param clusterModel The state of the cluster.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
        if (replica.broker().isAlive()) {
            continue;
        }
        if (_selfHealingDeadBrokersOnly) {
            throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned brokers.");
        }
        _selfHealingDeadBrokersOnly = true;
        LOG.warn("Ignoring potential network outbound limit to relocate remaining replicas from dead brokers to healthy ones.");
        return;
    }
    finish();
}
Also used : OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 39 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class RackAwareGoal method ensureRackAware.

private void ensureRackAware(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity check to confirm that the final distribution is rack aware.
    for (Replica leader : clusterModel.leaderReplicas()) {
        if (excludedTopics.contains(leader.topicPartition().topic())) {
            continue;
        }
        Set<String> replicaBrokersRackIds = new HashSet<>();
        Set<Broker> followerBrokers = new HashSet<>(clusterModel.partition(leader.topicPartition()).followerBrokers());
        // Add rack Id of replicas.
        for (Broker followerBroker : followerBrokers) {
            String followerRackId = followerBroker.rack().id();
            replicaBrokersRackIds.add(followerRackId);
        }
        replicaBrokersRackIds.add(leader.broker().rack().id());
        if (replicaBrokersRackIds.size() != (followerBrokers.size() + 1)) {
            throw new OptimizationFailureException("Optimization for goal " + name() + " failed for rack-awareness of " + "partition " + leader.topicPartition());
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) HashSet(java.util.HashSet)

Example 40 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class RackAwareGoal method rebalanceForBroker.

/**
 * Rack-awareness violations can be resolved with replica movements.
 *
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
    // Satisfy rack awareness requirement.
    SortedSet<Replica> replicas = new TreeSet<>(broker.replicas());
    for (Replica replica : replicas) {
        if ((broker.isAlive() && satisfiedRackAwareness(replica, clusterModel)) || shouldExclude(replica, excludedTopics)) {
            continue;
        }
        // Rack awareness is violated. Move replica to a broker in another rack.
        if (maybeApplyBalancingAction(clusterModel, replica, rackAwareEligibleBrokers(replica, clusterModel), ActionType.REPLICA_MOVEMENT, optimizedGoals) == null) {
            throw new OptimizationFailureException("Violated rack-awareness requirement for broker with id " + broker.id() + ".");
        }
    }
}
Also used : OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) TreeSet(java.util.TreeSet) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Aggregations

Replica (com.linkedin.kafka.cruisecontrol.model.Replica)40 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)26 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)13 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)12 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)9 HashSet (java.util.HashSet)9 TreeSet (java.util.TreeSet)8 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)7 ActionAcceptance (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)6 ActionType (com.linkedin.kafka.cruisecontrol.analyzer.ActionType)6 BalancingAction (com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction)6 ArrayList (java.util.ArrayList)6 List (java.util.List)6 ACCEPT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT)5 REPLICA_REJECT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.REPLICA_REJECT)5 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)5 ModelCompletenessRequirements (com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements)5 Set (java.util.Set)5 SortedSet (java.util.SortedSet)5 Collectors (java.util.stream.Collectors)5