Search in sources :

Example 1 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class RackAwareGoal method initGoalState.

/**
 * This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
 * Sanity Check: There exists sufficient number of racks for achieving rack-awareness.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    // Sanity Check: not enough racks to satisfy rack awareness.
    int numAliveRacks = clusterModel.numAliveRacks();
    Set<String> excludedTopics = optimizationOptions.excludedTopics();
    if (!excludedTopics.isEmpty()) {
        int maxReplicationFactorOfIncludedTopics = 1;
        Map<String, Integer> replicationFactorByTopic = clusterModel.replicationFactorByTopic();
        for (Map.Entry<String, Integer> replicationFactorByTopicEntry : replicationFactorByTopic.entrySet()) {
            if (!excludedTopics.contains(replicationFactorByTopicEntry.getKey())) {
                maxReplicationFactorOfIncludedTopics = Math.max(maxReplicationFactorOfIncludedTopics, replicationFactorByTopicEntry.getValue());
                if (maxReplicationFactorOfIncludedTopics > numAliveRacks) {
                    int missingRacks = maxReplicationFactorOfIncludedTopics - numAliveRacks;
                    ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numRacks(missingRacks).build();
                    throw new OptimizationFailureException(String.format("[%s] Insufficient number of racks to distribute included replicas (Current: %d, Needed: %d).", name(), numAliveRacks, maxReplicationFactorOfIncludedTopics), recommendation);
                }
            }
        }
    } else if (clusterModel.maxReplicationFactor() > numAliveRacks) {
        int missingRacks = clusterModel.maxReplicationFactor() - numAliveRacks;
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numRacks(missingRacks).build();
        throw new OptimizationFailureException(String.format("[%s] Insufficient number of racks to distribute each replica (Current: %d, Needed: %d).", name(), numAliveRacks, clusterModel.maxReplicationFactor()), recommendation);
    }
    int numExtraRacks = numAliveRacks - clusterModel.maxReplicationFactor();
    if (numExtraRacks >= _balancingConstraint.overprovisionedMinExtraRacks()) {
        int numRacksToDrop = numExtraRacks - _balancingConstraint.overprovisionedMinExtraRacks() + 1;
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.OVER_PROVISIONED).numRacks(numRacksToDrop).build();
        _provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, recommendation, name());
    }
    // Filter out some replicas based on optimization options.
    new SortedReplicasHelper().maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), optimizationOptions.onlyMoveImmigrantReplicas()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).trackSortedReplicasFor(replicaSortName(this, false, false), clusterModel);
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse) SortedReplicasHelper(com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint) Map(java.util.Map)

Example 2 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class ResourceDistributionGoal method updateGoalState.

/**
 * Update the current resource that is being balanced if there are still resources to be balanced, finish otherwise.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    Set<Integer> brokerIdsAboveBalanceUpperLimit = new HashSet<>();
    Set<Integer> brokerIdsUnderBalanceLowerLimit = new HashSet<>();
    // While proposals exclude the excludedTopics, the balance still considers utilization of the excludedTopic replicas.
    for (Broker broker : clusterModel.aliveBrokers()) {
        if (!isLoadUnderBalanceUpperLimit(broker)) {
            brokerIdsAboveBalanceUpperLimit.add(broker.id());
        }
        if (!isExcludedForReplicaMove(broker) && !isLoadAboveBalanceLowerLimit(broker)) {
            // A broker that is excluded for replica moves cannot be under the balance lower limit.
            brokerIdsUnderBalanceLowerLimit.add(broker.id());
        }
    }
    if (!brokerIdsAboveBalanceUpperLimit.isEmpty()) {
        LOG.debug("Utilization for broker ids:{} {} above the balance limit for:{} after {}.", brokerIdsAboveBalanceUpperLimit, (brokerIdsAboveBalanceUpperLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _succeeded = false;
    } else if (_isLowUtilization) {
        // Cluster is under a low utilization state and all brokers are under the corresponding balance upper limit.
        _provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, _overProvisionedRecommendation, name());
    }
    if (!brokerIdsUnderBalanceLowerLimit.isEmpty()) {
        LOG.debug("Utilization for broker ids:{} {} under the balance limit for:{} after {}.", brokerIdsUnderBalanceLowerLimit, (brokerIdsUnderBalanceLowerLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _succeeded = false;
    } else if (brokerIdsAboveBalanceUpperLimit.isEmpty() && !_isLowUtilization) {
        // All brokers are within the upper and lower balance limits and the cluster is not under a low utilization state.
        _provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
    }
    // Sanity check: No self-healing eligible replica should remain at a dead broker/disk.
    try {
        GoalUtils.ensureNoOfflineReplicas(clusterModel, name());
    } catch (OptimizationFailureException ofe) {
        if (_fixOfflineReplicasOnly) {
            throw ofe;
        }
        _fixOfflineReplicasOnly = true;
        LOG.info("Ignoring resource balance limit to move replicas from dead brokers/disks.");
        return;
    }
    // Sanity check: No replica should be moved to a broker, which used to host any replica of the same partition on its broken disk.
    GoalUtils.ensureReplicasMoveOffBrokersWithBadDisks(clusterModel, name());
    finish();
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse) HashSet(java.util.HashSet)

Example 3 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class AbstractGoal method optimize.

@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    try {
        _succeeded = true;
        // Resetting the provision response ensures fresh provision response if the same goal is optimized multiple times.
        _provisionResponse = new ProvisionResponse(UNDECIDED);
        LOG.debug("Starting optimization for {}.", name());
        // Initialize pre-optimized stats.
        ClusterModelStats statsBeforeOptimization = clusterModel.getClusterStats(_balancingConstraint, optimizationOptions);
        LOG.trace("[PRE - {}] {}", name(), statsBeforeOptimization);
        _finished = false;
        long goalStartTime = System.currentTimeMillis();
        initGoalState(clusterModel, optimizationOptions);
        SortedSet<Broker> brokenBrokers = clusterModel.brokenBrokers();
        boolean originallyHasExcludedBrokersForReplicaMoveWithReplicas = hasExcludedBrokersForReplicaMoveWithReplicas(clusterModel, optimizationOptions);
        while (!_finished) {
            for (Broker broker : brokersToBalance(clusterModel)) {
                rebalanceForBroker(broker, clusterModel, optimizedGoals, optimizationOptions);
            }
            updateGoalState(clusterModel, optimizationOptions);
        }
        ClusterModelStats statsAfterOptimization = clusterModel.getClusterStats(_balancingConstraint, optimizationOptions);
        LOG.trace("[POST - {}] {}", name(), statsAfterOptimization);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Finished optimization for {} in {}ms.", name(), System.currentTimeMillis() - goalStartTime);
        }
        LOG.trace("Cluster after optimization is {}", clusterModel);
        // The optimization cannot make stats worse unless the cluster has (1) broken brokers or (2) excluded brokers for replica move with replicas.
        if (brokenBrokers.isEmpty() && !originallyHasExcludedBrokersForReplicaMoveWithReplicas) {
            ClusterModelStatsComparator comparator = clusterModelStatsComparator();
            // Throw exception when the stats before optimization is preferred.
            if (comparator.compare(statsAfterOptimization, statsBeforeOptimization) < 0) {
                // If a goal provides worse stats after optimization, that indicates an implementation error with the goal.
                throw new IllegalStateException(String.format("Optimization for goal %s failed because the optimized result is worse than before." + " Reason: %s.", name(), comparator.explainLastComparison()));
            }
        }
        // Ensure that a cluster is not identified as over provisioned unless it has the minimum required number of alive brokers.
        if (_provisionResponse.status() == OVER_PROVISIONED && clusterModel.aliveBrokers().size() < _balancingConstraint.overprovisionedMinBrokers()) {
            _provisionResponse = new ProvisionResponse(RIGHT_SIZED);
        }
        return _succeeded;
    } catch (OptimizationFailureException ofe) {
        _provisionResponse = new ProvisionResponse(UNDER_PROVISIONED, ofe.provisionRecommendation(), name());
        // Mitigation (if relevant) is reported as part of exception message to provide helpful tips concerning the used optimizationOptions.
        String mitigation = GoalUtils.mitigationForOptimizationFailures(optimizationOptions);
        String message = String.format("%s%s", ofe.getMessage(), mitigation.isEmpty() ? "" : String.format(" || Tips: %s", mitigation));
        throw new OptimizationFailureException(message, ofe.provisionRecommendation());
    } finally {
        // Clear any sorted replicas tracked in the process of optimization.
        clusterModel.clearSortedReplicas();
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ClusterModelStats(com.linkedin.kafka.cruisecontrol.model.ClusterModelStats) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)

Example 4 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class RackAwareDistributionGoal method updateGoalState.

/**
 * Update goal state.
 * Sanity check: After completion of balancing / self-healing, confirm that replicas of each partition are evenly
 * distributed across the racks.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    // Sanity check: No self-healing eligible replica should remain at a dead broker/disk.
    GoalUtils.ensureNoOfflineReplicas(clusterModel, name());
    // One pass is sufficient to satisfy or alert impossibility of this goal.
    // Sanity check to confirm that replicas of each partition are evenly distributed across the racks
    ensureRackAwareDistribution(clusterModel, optimizationOptions);
    // Sanity check: No replica should be moved to a broker, which used to host any replica of the same partition on its broken disk.
    GoalUtils.ensureReplicasMoveOffBrokersWithBadDisks(clusterModel, name());
    if (_provisionResponse.status() != ProvisionStatus.OVER_PROVISIONED) {
        _provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
    }
    finish();
}
Also used : ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)

Example 5 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class RackAwareDistributionGoal method initGoalState.

/**
 * This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    _brokersAllowedReplicaMove = GoalUtils.aliveBrokersNotExcludedForReplicaMove(clusterModel, optimizationOptions);
    if (_brokersAllowedReplicaMove.isEmpty()) {
        // Handle the case when all alive brokers are excluded from replica moves.
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(clusterModel.maxReplicationFactor()).build();
        throw new OptimizationFailureException(String.format("[%s] All alive brokers are excluded from replica moves.", name()), recommendation);
    }
    _balanceLimit = new BalanceLimit(clusterModel, optimizationOptions);
    int numExtraRacks = _balanceLimit.numAliveRacksAllowedReplicaMoves() - clusterModel.maxReplicationFactor();
    if (numExtraRacks >= _balancingConstraint.overprovisionedMinExtraRacks()) {
        int numRacksToDrop = numExtraRacks - _balancingConstraint.overprovisionedMinExtraRacks() + 1;
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.OVER_PROVISIONED).numRacks(numRacksToDrop).build();
        _provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, recommendation, name());
    }
    Set<String> excludedTopics = optimizationOptions.excludedTopics();
    // Filter out some replicas based on optimization options.
    new SortedReplicasHelper().maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), optimizationOptions.onlyMoveImmigrantReplicas()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).trackSortedReplicasFor(replicaSortName(this, false, false), clusterModel);
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse) SortedReplicasHelper(com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Aggregations

ProvisionResponse (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)9 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)5 ProvisionRecommendation (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation)3 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)2 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)2 SortedReplicasHelper (com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper)2 Timer (com.codahale.metrics.Timer)1 NotEnoughValidWindowsException (com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException)1 KafkaCruiseControlUtils.balancednessCostByGoal (com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils.balancednessCostByGoal)1 Goal (com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal)1 OperationProgress (com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress)1 KafkaCruiseControlException (com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException)1 ExecutorState (com.linkedin.kafka.cruisecontrol.executor.ExecutorState)1 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)1 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1