Search in sources :

Example 11 with Disk

use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.

the class IntraBrokerDiskUsageDistributionGoal method rebalanceBySwappingLoadOut.

/**
 * Try to balance the overloaded disk by swapping its replicas with replicas from other disks of the same broker.
 *
 * @param disk                 The disk to balance.
 * @param clusterModel         The current cluster model.
 * @param optimizedGoals       Optimized goals.
 * @param optimizationOptions  Options to take into account during optimization -- e.g. excluded topics.
 */
private void rebalanceBySwappingLoadOut(Disk disk, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
    long swapStartTimeMs = System.currentTimeMillis();
    Broker broker = disk.broker();
    PriorityQueue<Disk> candidateDiskPQ = new PriorityQueue<>(Comparator.comparingDouble(GoalUtils::diskUtilizationPercentage));
    for (Disk candidateDisk : broker.disks()) {
        // Get candidate disk on broker to try to swap replica with -- sorted in the order of trial (ascending load).
        if (candidateDisk.isAlive() && diskUtilizationPercentage(candidateDisk) < _balanceUpperThresholdByBroker.get(broker)) {
            candidateDiskPQ.add(candidateDisk);
        }
    }
    while (!candidateDiskPQ.isEmpty()) {
        Disk candidateDisk = candidateDiskPQ.poll();
        for (Replica sourceReplica : disk.trackedSortedReplicas(replicaSortName(this, true, false)).sortedReplicas(false)) {
            // Try swapping the source with the candidate replicas. Get the swapped in replica if successful, null otherwise.
            Replica swappedIn = maybeSwapReplicaBetweenDisks(clusterModel, sourceReplica, candidateDisk.trackedSortedReplicas(replicaSortName(this, false, false)).sortedReplicas(false), optimizedGoals);
            if (swappedIn != null) {
                if (diskUtilizationPercentage(disk) < _balanceUpperThresholdByBroker.get(broker)) {
                    // Successfully balanced this broker by swapping in.
                    return;
                }
                break;
            }
        }
        if (remainingPerDiskSwapTimeMs(swapStartTimeMs) <= 0) {
            LOG.debug("Swap load out timeout for disk {}.", disk.logDir());
            break;
        }
        if (diskUtilizationPercentage(candidateDisk) < _balanceUpperThresholdByBroker.get(broker)) {
            candidateDiskPQ.add(candidateDisk);
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) PriorityQueue(java.util.PriorityQueue) Disk(com.linkedin.kafka.cruisecontrol.model.Disk) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 12 with Disk

use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.

the class IntraBrokerDiskUsageDistributionGoal method rebalanceByMovingLoadOut.

/**
 * Try to balance the overloaded disk by moving out replicas to other disks of the same broker.
 *
 * @param disk                 The disk to balance.
 * @param clusterModel         The current cluster model.
 * @param optimizedGoals       Optimized goals.
 * @param optimizationOptions  Options to take into account during optimization -- e.g. excluded topics.
 * @return {@code true} if the disk to balance is still overloaded, {@code false} otherwise.
 */
private boolean rebalanceByMovingLoadOut(Disk disk, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
    Broker broker = disk.broker();
    double brokerUtilization = averageDiskUtilizationPercentage(broker);
    PriorityQueue<Disk> candidateDiskPQ = new PriorityQueue<>(Comparator.comparingDouble(GoalUtils::diskUtilizationPercentage));
    for (Disk candidateDisk : broker.disks()) {
        // Get candidate disk on broker to try moving load to -- sorted in the order of trial (ascending load).
        if (candidateDisk.isAlive() && diskUtilizationPercentage(candidateDisk) < brokerUtilization) {
            candidateDiskPQ.add(candidateDisk);
        }
    }
    while (!candidateDiskPQ.isEmpty()) {
        Disk candidateDisk = candidateDiskPQ.poll();
        for (Iterator<Replica> iterator = disk.trackedSortedReplicas(replicaSortName(this, true, false)).sortedReplicas(true).iterator(); iterator.hasNext(); ) {
            Replica replica = iterator.next();
            Disk d = maybeMoveReplicaBetweenDisks(clusterModel, replica, Collections.singleton(candidateDisk), optimizedGoals);
            // move to destination disk. In that case we will never re-enqueue that destination disk.
            if (d != null) {
                if (diskUtilizationPercentage(disk) < _balanceUpperThresholdByBroker.get(broker)) {
                    return false;
                }
                iterator.remove();
                // we re-enqueue the destination disk and switch to the next disk.
                if (!candidateDiskPQ.isEmpty() && diskUtilizationPercentage(candidateDisk) > diskUtilizationPercentage(candidateDiskPQ.peek())) {
                    candidateDiskPQ.add(candidateDisk);
                    break;
                }
            }
        }
    }
    return true;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) PriorityQueue(java.util.PriorityQueue) Disk(com.linkedin.kafka.cruisecontrol.model.Disk) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 13 with Disk

use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.

the class PreferredLeaderElectionGoal method optimize.

@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
    sanityCheckOptimizationOptions(optimizationOptions);
    // First move the replica on the demoted brokers to the end of the replica list.
    // If all the replicas are demoted, no change is made to the leader.
    boolean hasBrokerOrDiskToBeDemoted = false;
    Set<TopicPartition> partitionsToMove = new HashSet<>();
    for (Broker b : clusterModel.aliveBrokers()) {
        if (b.isDemoted()) {
            hasBrokerOrDiskToBeDemoted = true;
            for (Replica r : b.replicas()) {
                maybeMoveReplicaToEndOfReplicaList(r, clusterModel);
            }
            maybeChangeLeadershipForPartition(b.leaderReplicas(), partitionsToMove);
        } else {
            for (Disk d : b.disks()) {
                if (d.state() == Disk.State.DEMOTED) {
                    hasBrokerOrDiskToBeDemoted = true;
                    for (Replica r : d.replicas()) {
                        maybeMoveReplicaToEndOfReplicaList(r, clusterModel);
                    }
                    maybeChangeLeadershipForPartition(d.leaderReplicas(), partitionsToMove);
                }
            }
        }
    }
    // Check whether this goal has relocated any leadership.
    boolean relocatedLeadership = false;
    Set<Integer> excludedBrokersForLeadership = optimizationOptions.excludedBrokersForLeadership();
    // Ignore the excluded topics because this goal does not move partitions.
    for (List<Partition> partitions : clusterModel.getPartitionsByTopic().values()) {
        for (Partition p : partitions) {
            if (hasBrokerOrDiskToBeDemoted && !partitionsToMove.contains(p.topicPartition())) {
                continue;
            }
            for (int i = 0; i < p.replicas().size(); i++) {
                // If there is no broker or disk to be demoted, only try to transfer the leadership to the first replica of the partition.
                if (!hasBrokerOrDiskToBeDemoted && i > 0) {
                    break;
                }
                Replica r = p.replicas().get(i);
                // Iterate over the replicas and ensure that (1) the leader is set to the first alive replica, and (2) the
                // leadership is not transferred to a broker excluded for leadership transfer.
                Broker leaderCandidate = r.broker();
                if (leaderCandidate.isAlive()) {
                    if (r.isCurrentOffline()) {
                        LOG.warn("The preferred replica of partition {} on broker {} is offline.", p.topicPartition(), leaderCandidate);
                        continue;
                    }
                    if (!r.isLeader()) {
                        if (excludedBrokersForLeadership.contains(leaderCandidate.id())) {
                            LOG.warn("Skipped leadership transfer of partition {} to broker {} because it is among brokers excluded" + " for leadership {}.", p.topicPartition(), leaderCandidate, excludedBrokersForLeadership);
                            continue;
                        }
                        clusterModel.relocateLeadership(r.topicPartition(), p.leader().broker().id(), leaderCandidate.id());
                        relocatedLeadership = true;
                    }
                    if (clusterModel.demotedBrokers().contains(leaderCandidate)) {
                        LOG.warn("The leader of partition {} has to be on a demoted broker {} because all the alive " + "replicas are demoted.", p.topicPartition(), leaderCandidate.id());
                    }
                    if (r.disk() != null && r.disk().state() == Disk.State.DEMOTED) {
                        LOG.warn("The leader of partition {} has to be on a demoted disk {} of broker {} because all the alive " + "replicas are demoted.", p.topicPartition(), r.disk().logDir(), leaderCandidate.id());
                    }
                    break;
                }
            }
        }
    }
    // This goal is optimized in one pass.
    finish();
    // Return true if at least one leadership has been relocated.
    return relocatedLeadership;
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) Partition(com.linkedin.kafka.cruisecontrol.model.Partition) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) TopicPartition(org.apache.kafka.common.TopicPartition) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) Disk(com.linkedin.kafka.cruisecontrol.model.Disk) HashSet(java.util.HashSet)

Example 14 with Disk

use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.

the class IntraBrokerDiskCapacityGoal method updateGoalState.

/**
 * Update goal state.
 * Sanity check: After completion of balancing the resource, confirm that the utilization is under the capacity and finish.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    for (Broker broker : brokersToBalance(clusterModel)) {
        for (Disk disk : broker.disks()) {
            if (disk.isAlive() && isUtilizationOverLimit(disk)) {
                // The utilization of the host for the resource is over the capacity limit.
                double requiredCapacity = disk.utilization() / _balancingConstraint.capacityThreshold(RESOURCE);
                ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numDisks(1).totalCapacity(requiredCapacity).build();
                throw new OptimizationFailureException(String.format("[%s] Utilization (%.2f) for disk %s on broker %d is above capacity limit.", name(), disk.utilization(), disk, broker.id()), recommendation);
            }
        }
    }
    finish();
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Disk(com.linkedin.kafka.cruisecontrol.model.Disk)

Example 15 with Disk

use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.

the class PreferredLeaderElectionGoalTest method testOptimizeWithDemotedDisks.

@Test
public void testOptimizeWithDemotedDisks() {
    ClusterModel clusterModel = createClusterModel(true, true).clusterModel();
    clusterModel.broker(0).disk(LOGDIR0).setState(Disk.State.DEMOTED);
    clusterModel.broker(1).disk(LOGDIR1).setState(Disk.State.DEMOTED);
    Set<TopicPartition> leaderPartitionsOnDemotedDisk = new HashSet<>();
    clusterModel.broker(0).disk(LOGDIR0).leaderReplicas().forEach(r -> leaderPartitionsOnDemotedDisk.add(r.topicPartition()));
    clusterModel.broker(1).disk(LOGDIR1).leaderReplicas().forEach(r -> leaderPartitionsOnDemotedDisk.add(r.topicPartition()));
    Map<TopicPartition, Integer> leaderDistributionBeforeBrokerDemotion = new HashMap<>();
    clusterModel.brokers().forEach(b -> b.leaderReplicas().forEach(r -> leaderDistributionBeforeBrokerDemotion.put(r.topicPartition(), b.id())));
    PreferredLeaderElectionGoal goal = new PreferredLeaderElectionGoal(false, false, null);
    // Before the optimization, goals are expected to be undecided wrt their provision status.
    assertEquals(ProvisionStatus.UNDECIDED, goal.provisionResponse().status());
    goal.optimize(clusterModel, Collections.emptySet(), new OptimizationOptions(Collections.emptySet(), Collections.emptySet(), Collections.emptySet()));
    // After the optimization, PreferredLeaderElectionGoal is expected to be undecided wrt its provision status.
    assertEquals(ProvisionStatus.UNDECIDED, goal.provisionResponse().status());
    for (String t : Arrays.asList(TOPIC0, TOPIC1, TOPIC2)) {
        for (int p = 0; p < 3; p++) {
            TopicPartition tp = new TopicPartition(t, p);
            if (!leaderPartitionsOnDemotedDisk.contains(tp)) {
                int oldLeaderBroker = leaderDistributionBeforeBrokerDemotion.get(tp);
                assertEquals("Tp " + tp, oldLeaderBroker, clusterModel.partition(tp).leader().broker().id());
            } else {
                List<Replica> replicas = clusterModel.partition(tp).replicas();
                for (int i = 0; i < 3; i++) {
                    Replica replica = replicas.get(i);
                    // only the first replica should be leader.
                    assertEquals(i == 0, replica.isLeader());
                    if (clusterModel.broker(0).disk(LOGDIR0).replicas().contains(replica) || clusterModel.broker(1).disk(LOGDIR1).replicas().contains(replica)) {
                        // The demoted replica should be in the last position.
                        assertEquals(replica.topicPartition() + " broker " + replica.broker().id(), replicas.size() - 1, i);
                    }
                }
            }
        }
    }
}
Also used : Replica(com.linkedin.kafka.cruisecontrol.model.Replica) Arrays(java.util.Arrays) ReplicaPlacementInfo(com.linkedin.kafka.cruisecontrol.model.ReplicaPlacementInfo) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) HashMap(java.util.HashMap) PreferredLeaderElectionGoal(com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TOPIC0(com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC0) Disk(com.linkedin.kafka.cruisecontrol.model.Disk) TOPIC1(com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC1) Cluster(org.apache.kafka.common.Cluster) TOPIC2(com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC2) KafkaMetricDef(com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef) Map(java.util.Map) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) ModelGeneration(com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration) TopicPartition(org.apache.kafka.common.TopicPartition) BrokerCapacityInfo(com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo) Set(java.util.Set) Test(org.junit.Test) PartitionInfo(org.apache.kafka.common.PartitionInfo) MetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.MetricValues) LOGDIR0(com.linkedin.kafka.cruisecontrol.common.TestConstants.LOGDIR0) Collectors(java.util.stream.Collectors) LOGDIR1(com.linkedin.kafka.cruisecontrol.common.TestConstants.LOGDIR1) TestConstants(com.linkedin.kafka.cruisecontrol.common.TestConstants) Broker(com.linkedin.kafka.cruisecontrol.model.Broker) List(java.util.List) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) Node(org.apache.kafka.common.Node) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) PreferredLeaderElectionGoal(com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) TopicPartition(org.apache.kafka.common.TopicPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Disk (com.linkedin.kafka.cruisecontrol.model.Disk)16 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)10 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)10 ArrayList (java.util.ArrayList)4 PriorityQueue (java.util.PriorityQueue)4 HashSet (java.util.HashSet)3 TopicPartition (org.apache.kafka.common.TopicPartition)3 AggregatedMetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues)2 MetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.MetricValues)2 PreferredLeaderElectionGoal (com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal)2 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)2 TestConstants (com.linkedin.kafka.cruisecontrol.common.TestConstants)2 LOGDIR0 (com.linkedin.kafka.cruisecontrol.common.TestConstants.LOGDIR0)2 LOGDIR1 (com.linkedin.kafka.cruisecontrol.common.TestConstants.LOGDIR1)2 TOPIC0 (com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC0)2 TOPIC1 (com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC1)2 TOPIC2 (com.linkedin.kafka.cruisecontrol.common.TestConstants.TOPIC2)2 BrokerCapacityInfo (com.linkedin.kafka.cruisecontrol.config.BrokerCapacityInfo)2 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)2 ReplicaPlacementInfo (com.linkedin.kafka.cruisecontrol.model.ReplicaPlacementInfo)2