Search in sources :

Example 36 with ClusterModel

use of com.linkedin.kafka.cruisecontrol.model.ClusterModel in project cruise-control by linkedin.

the class GoalOptimizer method optimizations.

/**
 * Depending the existence of dead/decommissioned brokers in the given cluster:
 * (1) Re-balance: Generates proposals to update the state of the cluster to achieve a final balanced state.
 * (2) Self-healing: Generates proposals to move replicas away from decommissioned brokers.
 * Returns a map from goal names to stats. Initial stats are returned under goal name "init".
 *
 * @param clusterModel The state of the cluster over which the balancing proposal will be applied. Function execution
 *                     updates the cluster state with balancing proposals. If the cluster model is specified, the
 *                     cached proposal will be ignored.
 * @param goalsByPriority the goals ordered by priority.
 * @param operationProgress to report the job progress.
 * @return Results of optimization containing the proposals and stats.
 */
public OptimizerResult optimizations(ClusterModel clusterModel, Map<Integer, Goal> goalsByPriority, OperationProgress operationProgress) throws KafkaCruiseControlException {
    if (clusterModel == null) {
        throw new IllegalArgumentException("The cluster model cannot be null");
    }
    // Sanity check for optimizing goals.
    if (!clusterModel.isClusterAlive()) {
        throw new IllegalArgumentException("All brokers are dead in the cluster.");
    }
    LOG.trace("Cluster before optimization is {}", clusterModel);
    ClusterModel.BrokerStats brokerStatsBeforeOptimization = clusterModel.brokerStats();
    Map<TopicPartition, List<Integer>> initReplicaDistribution = clusterModel.getReplicaDistribution();
    Map<TopicPartition, Integer> initLeaderDistribution = clusterModel.getLeaderDistribution();
    boolean isSelfHealing = !clusterModel.selfHealingEligibleReplicas().isEmpty();
    // Set of balancing proposals that will be applied to the given cluster state to satisfy goals (leadership
    // transfer AFTER partition transfer.)
    Set<Goal> optimizedGoals = new HashSet<>();
    Set<Goal> violatedGoalsBeforeOptimization = new HashSet<>();
    Set<Goal> violatedGoalsAfterOptimization = new HashSet<>();
    Map<Goal, ClusterModelStats> statsByGoalPriority = new LinkedHashMap<>();
    Map<TopicPartition, List<Integer>> preOptimizedReplicaDistribution = null;
    Map<TopicPartition, Integer> preOptimizedLeaderDistribution = null;
    Set<String> excludedTopics = excludedTopics(clusterModel);
    LOG.debug("Topics excluded from partition movement: {}", excludedTopics);
    for (Map.Entry<Integer, Goal> entry : goalsByPriority.entrySet()) {
        preOptimizedReplicaDistribution = preOptimizedReplicaDistribution == null ? initReplicaDistribution : clusterModel.getReplicaDistribution();
        preOptimizedLeaderDistribution = preOptimizedLeaderDistribution == null ? initLeaderDistribution : clusterModel.getLeaderDistribution();
        Goal goal = entry.getValue();
        OptimizationForGoal step = new OptimizationForGoal(goal.name());
        operationProgress.addStep(step);
        LOG.debug("Optimizing goal {}", goal.name());
        boolean succeeded = goal.optimize(clusterModel, optimizedGoals, excludedTopics);
        optimizedGoals.add(goal);
        statsByGoalPriority.put(goal, clusterModel.getClusterStats(_balancingConstraint));
        Set<ExecutionProposal> goalProposals = AnalyzerUtils.getDiff(preOptimizedReplicaDistribution, preOptimizedLeaderDistribution, clusterModel);
        if (!goalProposals.isEmpty() || !succeeded) {
            violatedGoalsBeforeOptimization.add(goal);
        }
        if (!succeeded) {
            violatedGoalsAfterOptimization.add(goal);
        }
        logProgress(isSelfHealing, goal.name(), optimizedGoals.size(), goalProposals);
        step.done();
        LOG.debug("Broker level stats after optimization: {}", clusterModel.brokerStats());
    }
    clusterModel.sanityCheck();
    // Broker level stats in the final cluster state.
    if (LOG.isTraceEnabled()) {
        LOG.trace("Broker level stats after optimization: {}%n", clusterModel.brokerStats());
    }
    Set<ExecutionProposal> proposals = AnalyzerUtils.getDiff(initReplicaDistribution, initLeaderDistribution, clusterModel);
    return new OptimizerResult(statsByGoalPriority, violatedGoalsBeforeOptimization, violatedGoalsAfterOptimization, proposals, brokerStatsBeforeOptimization, clusterModel.brokerStats(), clusterModel.generation(), clusterModel.getClusterStats(_balancingConstraint));
}
Also used : OptimizationForGoal(com.linkedin.kafka.cruisecontrol.async.progress.OptimizationForGoal) ClusterModelStats(com.linkedin.kafka.cruisecontrol.model.ClusterModelStats) LinkedHashMap(java.util.LinkedHashMap) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OptimizationForGoal(com.linkedin.kafka.cruisecontrol.async.progress.OptimizationForGoal) Goal(com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal) ExecutionProposal(com.linkedin.kafka.cruisecontrol.executor.ExecutionProposal) TopicPartition(org.apache.kafka.common.TopicPartition) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashSet(java.util.HashSet)

Example 37 with ClusterModel

use of com.linkedin.kafka.cruisecontrol.model.ClusterModel in project cruise-control by linkedin.

the class KafkaCruiseControl method getOptimizationProposals.

/**
 * Optimize a cluster workload model.
 * @param goals a list of goals to optimize. When empty all goals will be used.
 * @param requirements the model completeness requirements to enforce when generating the propsoals.
 * @param operationProgress the progress of the job to report.
 * @return The optimization result.
 * @throws KafkaCruiseControlException
 */
public GoalOptimizer.OptimizerResult getOptimizationProposals(List<String> goals, ModelCompletenessRequirements requirements, OperationProgress operationProgress) throws KafkaCruiseControlException {
    GoalOptimizer.OptimizerResult result;
    Map<Integer, Goal> goalsByPriority = goalsByPriority(goals);
    ModelCompletenessRequirements modelCompletenessRequirements = modelCompletenessRequirements(goalsByPriority.values()).weaker(requirements);
    // There are a few cases that we cannot use the cached best proposals:
    // 1. When users specified goals.
    // 2. When provided requirements contains a weaker requirement than what is used by the cached proposal.
    ModelCompletenessRequirements requirementsForCache = _goalOptimizer.modelCompletenessRequirementsForPrecomputing();
    boolean hasWeakerRequirement = requirementsForCache.minMonitoredPartitionsPercentage() > modelCompletenessRequirements.minMonitoredPartitionsPercentage() || requirementsForCache.minRequiredNumWindows() > modelCompletenessRequirements.minRequiredNumWindows() || (requirementsForCache.includeAllTopics() && !modelCompletenessRequirements.includeAllTopics());
    if ((goals != null && !goals.isEmpty()) || hasWeakerRequirement) {
        try (AutoCloseable ignored = _loadMonitor.acquireForModelGeneration(operationProgress)) {
            // The cached proposals are computed with ignoreMinMonitoredPartitions = true. So if user provided a different
            // setting, we need to generate a new model.
            ClusterModel clusterModel = _loadMonitor.clusterModel(-1, _time.milliseconds(), modelCompletenessRequirements, operationProgress);
            result = getOptimizationProposals(clusterModel, goalsByPriority, operationProgress);
        } catch (KafkaCruiseControlException kcce) {
            throw kcce;
        } catch (Exception e) {
            throw new KafkaCruiseControlException(e);
        }
    } else {
        result = getOptimizationProposals(operationProgress);
    }
    return result;
}
Also used : ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) Goal(com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal) GoalOptimizer(com.linkedin.kafka.cruisecontrol.analyzer.GoalOptimizer) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException) ModelCompletenessRequirements(com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException)

Example 38 with ClusterModel

use of com.linkedin.kafka.cruisecontrol.model.ClusterModel in project cruise-control by linkedin.

the class GoalViolationDetector method run.

@Override
public void run() {
    long now = _time.milliseconds();
    if (_loadMonitor.clusterModelGeneration().equals(_lastCheckedModelGeneration)) {
        LOG.debug("Skipping goal violation detection because the model generation hasn't changed. Current model generation {}", _loadMonitor.clusterModelGeneration());
        return;
    }
    AutoCloseable clusterModelSemaphore = null;
    try {
        LoadMonitorTaskRunner.LoadMonitorTaskRunnerState loadMonitorTaskRunnerState = _loadMonitor.taskRunnerState();
        if (!ViolationUtils.isLoadMonitorReady(loadMonitorTaskRunnerState)) {
            LOG.info("Skipping goal violation detection because load monitor is in {} state.", loadMonitorTaskRunnerState);
            return;
        }
        GoalViolations goalViolations = new GoalViolations();
        boolean newModelNeeded = true;
        ClusterModel clusterModel = null;
        for (Map.Entry<Integer, Goal> entry : _goals.entrySet()) {
            Goal goal = entry.getValue();
            if (_loadMonitor.meetCompletenessRequirements(goal.clusterModelCompletenessRequirements())) {
                LOG.debug("Detecting if {} is violated.", entry.getValue().name());
                // Because the model generation could be slow, We only get new cluster model if needed.
                if (newModelNeeded) {
                    if (clusterModelSemaphore != null) {
                        clusterModelSemaphore.close();
                    }
                    clusterModelSemaphore = _loadMonitor.acquireForModelGeneration(new OperationProgress());
                    // Make cluster model null before generating a new cluster model so the current one can be GCed.
                    clusterModel = null;
                    clusterModel = _loadMonitor.clusterModel(now, goal.clusterModelCompletenessRequirements(), new OperationProgress());
                }
                int priority = entry.getKey();
                newModelNeeded = optimizeForGoal(clusterModel, priority, goal, goalViolations);
            } else {
                LOG.debug("Skipping goal violation detection for {} because load completeness requirement is not met.", goal);
            }
        }
        if (clusterModel != null) {
            _lastCheckedModelGeneration = clusterModel.generation();
        }
        if (!goalViolations.violations().isEmpty()) {
            _anomalies.add(goalViolations);
        }
    } catch (NotEnoughValidWindowsException nevwe) {
        LOG.debug("Skipping goal violation detection because there are not enough valid windows.");
    } catch (KafkaCruiseControlException kcce) {
        LOG.warn("Goal violation detector received exception", kcce);
    } catch (Exception e) {
        LOG.error("Unexpected exception", e);
    } finally {
        if (clusterModelSemaphore != null) {
            try {
                clusterModelSemaphore.close();
            } catch (Exception e) {
                LOG.error("Received exception when closing auto closable semaphore", e);
            }
        }
        LOG.debug("Goal violation detection finished.");
    }
}
Also used : OperationProgress(com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress) LoadMonitorTaskRunner(com.linkedin.kafka.cruisecontrol.monitor.task.LoadMonitorTaskRunner) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException) NotEnoughValidWindowsException(com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException) NotEnoughValidWindowsException(com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) Goal(com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Aggregations

ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)38 TopicPartition (org.apache.kafka.common.TopicPartition)12 ModelCompletenessRequirements (com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements)11 Test (org.junit.Test)11 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)10 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)10 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)9 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)9 List (java.util.List)9 HashSet (java.util.HashSet)8 Goal (com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal)7 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)7 Set (java.util.Set)7 Logger (org.slf4j.Logger)7 LoggerFactory (org.slf4j.LoggerFactory)7 ActionAcceptance (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)6 BalancingAction (com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction)6 OperationProgress (com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress)6 ArrayList (java.util.ArrayList)6 Comparator (java.util.Comparator)6