Search in sources :

Example 6 with ProvisionRecommendation

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation in project cruise-control by linkedin.

the class ReplicaCapacityGoal method rebalanceForBroker.

/**
 * Rebalance the given broker without violating the constraints of the current goal and optimized goals.
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
    for (Replica replica : broker.trackedSortedReplicas(replicaSortName(this, false, false)).sortedReplicas(true)) {
        boolean isReplicaOffline = replica.isCurrentOffline();
        if (broker.replicas().size() <= _balancingConstraint.maxReplicasPerBroker() && !isReplicaOffline) {
            // replicas; hence, if the current replica is not offline, it means there is no other offline replica on the broker.
            break;
        }
        // The goal requirements are violated. Move replica to an eligible broker.
        List<Broker> eligibleBrokers = eligibleBrokers(replica, clusterModel).stream().map(BrokerReplicaCount::broker).collect(Collectors.toList());
        Broker b = maybeApplyBalancingAction(clusterModel, replica, eligibleBrokers, ActionType.INTER_BROKER_REPLICA_MOVEMENT, optimizedGoals, optimizationOptions);
        if (b == null) {
            if (!broker.isAlive()) {
                // If the replica resides in a dead broker, throw an exception!
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).build();
                throw new OptimizationFailureException(String.format("[%s] Failed to move dead broker replica %s of partition %s to a broker in %s. Per broker limit: " + "%d for brokers: %s", name(), replica, clusterModel.partition(replica.topicPartition()), eligibleBrokers, _balancingConstraint.maxReplicasPerBroker(), clusterModel.brokers()), recommendation);
            } else if (isReplicaOffline) {
                // If the replica is offline on a broker with bad disk, throw an exception!
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).build();
                throw new OptimizationFailureException(String.format("[%s] Failed to move offline replica %s of partition %s to a broker in %s. Per broker limit: " + "%d for brokers: %s", name(), replica, clusterModel.partition(replica.topicPartition()), eligibleBrokers, _balancingConstraint.maxReplicasPerBroker(), clusterModel.brokers()), recommendation);
            }
            LOG.debug("Failed to move replica {} to any broker in {}.", replica, eligibleBrokers);
        }
    }
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 7 with ProvisionRecommendation

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation in project cruise-control by linkedin.

the class CapacityGoal method initGoalState.

/**
 * Sanity checks: Existing total load on cluster is less than the limiting capacity
 * determined by the total capacity of alive cluster multiplied by the capacity threshold.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    // Sanity Check -- i.e. not enough resources.
    Load recentClusterLoad = clusterModel.load();
    // While proposals exclude the excludedTopics, the existingUtilization still considers replicas of the excludedTopics.
    double existingUtilization = recentClusterLoad.expectedUtilizationFor(resource());
    double capacity = clusterModel.capacityWithAllowedReplicaMovesFor(resource(), optimizationOptions);
    double allowedCapacity = capacity * _balancingConstraint.capacityThreshold(resource());
    if (allowedCapacity < existingUtilization) {
        Set<Integer> brokersAllowedReplicaMove = GoalUtils.aliveBrokersNotExcludedForReplicaMove(clusterModel, optimizationOptions);
        if (brokersAllowedReplicaMove.isEmpty()) {
            // Handle the case when all alive brokers are excluded from replica moves.
            ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(clusterModel.maxReplicationFactor()).build();
            throw new OptimizationFailureException(String.format("[%s] All alive brokers are excluded from replica moves.", name()), recommendation);
        }
        // Identify a typical broker capacity to be used in recommendations in case the cluster is under-provisioned.
        int typicalBrokerId = brokersAllowedReplicaMove.iterator().next();
        double typicalCapacity = clusterModel.broker(typicalBrokerId).capacityFor(resource());
        double missingCapacity = existingUtilization - allowedCapacity;
        int numBrokersToAdd = (int) Math.ceil(missingCapacity / (typicalCapacity * _balancingConstraint.capacityThreshold(resource())));
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(numBrokersToAdd).typicalBrokerCapacity(typicalCapacity).typicalBrokerId(typicalBrokerId).resource(resource()).build();
        throw new OptimizationFailureException(String.format("[%s] Insufficient capacity for %s (Utilization %.2f, Allowed Capacity %.2f, Threshold: %.2f).", name(), resource(), existingUtilization, allowedCapacity, _balancingConstraint.capacityThreshold(resource())), recommendation);
    }
    Set<String> excludedTopics = optimizationOptions.excludedTopics();
    boolean onlyMoveImmigrantReplicas = optimizationOptions.onlyMoveImmigrantReplicas();
    // Sort all replicas for each broker based on resource utilization.
    new SortedReplicasHelper().maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), onlyMoveImmigrantReplicas).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).maybeAddPriorityFunc(ReplicaSortFunctionFactory.prioritizeOfflineReplicas(), !clusterModel.selfHealingEligibleReplicas().isEmpty()).maybeAddPriorityFunc(ReplicaSortFunctionFactory.prioritizeImmigrants(), !onlyMoveImmigrantReplicas).setScoreFunc(ReplicaSortFunctionFactory.reverseSortByMetricGroupValue(resource().name())).trackSortedReplicasFor(replicaSortName(this, true, false), clusterModel);
    // Sort leader replicas for each broker based on resource utilization.
    new SortedReplicasHelper().addSelectionFunc(ReplicaSortFunctionFactory.selectLeaders()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), onlyMoveImmigrantReplicas).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).maybeAddPriorityFunc(ReplicaSortFunctionFactory.prioritizeImmigrants(), !onlyMoveImmigrantReplicas).setScoreFunc(ReplicaSortFunctionFactory.reverseSortByMetricGroupValue(resource().name())).trackSortedReplicasFor(replicaSortName(this, true, true), clusterModel);
}
Also used : Load(com.linkedin.kafka.cruisecontrol.model.Load) ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) SortedReplicasHelper(com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Example 8 with ProvisionRecommendation

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation in project cruise-control by linkedin.

the class CapacityGoal method ensureUtilizationUnderCapacity.

/**
 * Ensure that for the resource, the utilization is under the capacity of the host/broker-level.
 * {@link Resource#isBrokerResource()} and {@link Resource#isHostResource()} determines the level of checks this
 * function performs.
 * @param clusterModel Cluster model.
 */
private void ensureUtilizationUnderCapacity(ClusterModel clusterModel) throws OptimizationFailureException {
    Resource resource = resource();
    double capacityThreshold = _balancingConstraint.capacityThreshold(resource);
    for (Broker broker : clusterModel.brokers()) {
        // Host-level violation check.
        if (resource.isHostResource()) {
            double utilization = broker.host().load().expectedUtilizationFor(resource);
            double capacityLimit = broker.host().capacityFor(resource) * capacityThreshold;
            if (!broker.host().replicas().isEmpty() && utilization > capacityLimit) {
                // The utilization of the host for the resource is over the capacity limit.
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).resource(resource()).build();
                throw new OptimizationFailureException(String.format("[%s] %s utilization for host %s (%.2f) is above capacity limit (%.2f).", name(), resource, broker.host().name(), utilization, capacityLimit), recommendation);
            }
        }
        // Broker-level violation check.
        if (resource.isBrokerResource()) {
            double utilization = broker.load().expectedUtilizationFor(resource);
            double capacityLimit = broker.capacityFor(resource) * capacityThreshold;
            if (!broker.replicas().isEmpty() && utilization > capacityLimit) {
                // The utilization of the broker for the resource is over the capacity limit.
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).resource(resource()).build();
                throw new OptimizationFailureException(String.format("[%s] %s utilization for broker %d (%.2f) is above capacity limit (%.2f).", name(), resource, broker.id(), utilization, capacityLimit), recommendation);
            }
        }
    }
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Resource(com.linkedin.kafka.cruisecontrol.common.Resource)

Example 9 with ProvisionRecommendation

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation in project cruise-control by linkedin.

the class AbstractRackAwareGoal method rebalanceForBroker.

/**
 * Rebalance the given broker without violating the constraints of this custom rack aware goal and optimized goals.
 *
 * @param broker Broker to be balanced.
 * @param clusterModel The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param optimizationOptions Options to take into account during optimization.
 * @param throwExceptionIfCannotMove {@code true} to throw an {@link OptimizationFailureException} in case a required
 * balancing action for a replica fails for all rack-aware eligible brokers, {@code false} to just log the failure and return.
 * This parameter enables selected goals fail early in case the unsatisfiability of a goal can be determined early.
 */
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions, boolean throwExceptionIfCannotMove) throws OptimizationFailureException {
    for (Replica replica : broker.trackedSortedReplicas(replicaSortName(this, false, false)).sortedReplicas(true)) {
        if (broker.isAlive() && !broker.currentOfflineReplicas().contains(replica) && shouldKeepInTheCurrentBroker(replica, clusterModel)) {
            continue;
        }
        // The relevant rack awareness condition is violated. Move replica to an eligible broker
        SortedSet<Broker> eligibleBrokers = rackAwareEligibleBrokers(replica, clusterModel);
        if (maybeApplyBalancingAction(clusterModel, replica, eligibleBrokers, ActionType.INTER_BROKER_REPLICA_MOVEMENT, optimizedGoals, optimizationOptions) == null) {
            if (throwExceptionIfCannotMove) {
                Set<String> partitionRackIds = clusterModel.partition(replica.topicPartition()).partitionBrokers().stream().map(partitionBroker -> partitionBroker.rack().id()).collect(Collectors.toSet());
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).excludedRackIds(partitionRackIds).build();
                throw new OptimizationFailureException(String.format("[%s] Cannot move %s to %s.", name(), replica, eligibleBrokers), recommendation);
            }
            LOG.debug("Cannot move replica {} to any broker in {}", replica, eligibleBrokers);
        }
    }
}
Also used : Replica(com.linkedin.kafka.cruisecontrol.model.Replica) BROKER_REJECT(com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.BROKER_REJECT) Logger(org.slf4j.Logger) OptimizationOptions(com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions) SortedSet(java.util.SortedSet) REPLICA_REJECT(com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.REPLICA_REJECT) MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING(com.linkedin.kafka.cruisecontrol.analyzer.goals.GoalUtils.MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) ACCEPT(com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT) ActionType(com.linkedin.kafka.cruisecontrol.analyzer.ActionType) Collectors(java.util.stream.Collectors) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) BalancingAction(com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction) ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) GoalUtils.replicaSortName(com.linkedin.kafka.cruisecontrol.analyzer.goals.GoalUtils.replicaSortName) ProvisionStatus(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionStatus) ActionAcceptance(com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance) ModelCompletenessRequirements(com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements) ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 10 with ProvisionRecommendation

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation in project cruise-control by linkedin.

the class IntraBrokerDiskCapacityGoal method initGoalState.

/**
 * Sanity checks: For each alive broker in the cluster, the load for {@link Resource#DISK} less than the limiting capacity
 * determined by the total capacity of alive disks multiplied by the capacity threshold.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    // While proposals exclude the excludedTopics, the existingUtilization still considers replicas of the excludedTopics.
    for (Broker broker : clusterModel.aliveBrokers()) {
        double existingUtilization = broker.load().expectedUtilizationFor(RESOURCE);
        double allowedCapacity = broker.capacityFor(RESOURCE) * _balancingConstraint.capacityThreshold(RESOURCE);
        if (allowedCapacity < existingUtilization) {
            double requiredCapacity = existingUtilization / _balancingConstraint.capacityThreshold(RESOURCE);
            ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(1).totalCapacity(requiredCapacity).build();
            throw new OptimizationFailureException(String.format("[%s] Insufficient disk capacity at broker %d (Utilization %.2f, Allowed " + "Capacity %.2f).", name(), broker.id(), existingUtilization, allowedCapacity), recommendation);
        }
    }
    Set<String> excludedTopics = optimizationOptions.excludedTopics();
    // Sort all the replicas for each disk based on disk utilization.
    new SortedReplicasHelper().addSelectionFunc(ReplicaSortFunctionFactory.selectOnlineReplicas()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).addPriorityFunc(ReplicaSortFunctionFactory.prioritizeDiskImmigrants()).setScoreFunc(ReplicaSortFunctionFactory.reverseSortByMetricGroupValue(RESOURCE.name())).trackSortedReplicasFor(replicaSortName(this, true, false), clusterModel);
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) SortedReplicasHelper(com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper)

Aggregations

ProvisionRecommendation (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation)22 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)18 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)11 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)8 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)7 SortedReplicasHelper (com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper)7 HashSet (java.util.HashSet)4 ProvisionResponse (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)3 Map (java.util.Map)3 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)2 ActionAcceptance (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)1 ACCEPT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT)1 BROKER_REJECT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.BROKER_REJECT)1 REPLICA_REJECT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.REPLICA_REJECT)1 ActionType (com.linkedin.kafka.cruisecontrol.analyzer.ActionType)1 BalancingAction (com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction)1 OptimizationOptions (com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions)1 ProvisionStatus (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionStatus)1 MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING (com.linkedin.kafka.cruisecontrol.analyzer.goals.GoalUtils.MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING)1 GoalUtils.replicaSortName (com.linkedin.kafka.cruisecontrol.analyzer.goals.GoalUtils.replicaSortName)1