use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.
the class IntraBrokerDiskUsageDistributionGoal method rebalanceBySwappingLoadOut.
/**
* Try to balance the overloaded disk by swapping its replicas with replicas from other disks of the same broker.
*
* @param disk The disk to balance.
* @param clusterModel The current cluster model.
* @param optimizedGoals Optimized goals.
* @param optimizationOptions Options to take into account during optimization -- e.g. excluded topics.
*/
private void rebalanceBySwappingLoadOut(Disk disk, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
long swapStartTimeMs = System.currentTimeMillis();
Broker broker = disk.broker();
PriorityQueue<Disk> candidateDiskPQ = new PriorityQueue<>(Comparator.comparingDouble(GoalUtils::diskUtilizationPercentage));
for (Disk candidateDisk : broker.disks()) {
// Get candidate disk on broker to try to swap replica with -- sorted in the order of trial (ascending load).
if (candidateDisk.isAlive() && diskUtilizationPercentage(candidateDisk) < _balanceUpperThresholdByBroker.get(broker)) {
candidateDiskPQ.add(candidateDisk);
}
}
while (!candidateDiskPQ.isEmpty()) {
Disk candidateDisk = candidateDiskPQ.poll();
for (Replica sourceReplica : disk.trackedSortedReplicas(replicaSortName(this, true, false)).sortedReplicas(false)) {
// Try swapping the source with the candidate replicas. Get the swapped in replica if successful, null otherwise.
Replica swappedIn = maybeSwapReplicaBetweenDisks(clusterModel, sourceReplica, candidateDisk.trackedSortedReplicas(replicaSortName(this, false, false)).sortedReplicas(false), optimizedGoals);
if (swappedIn != null) {
if (diskUtilizationPercentage(disk) < _balanceUpperThresholdByBroker.get(broker)) {
// Successfully balanced this broker by swapping in.
return;
}
break;
}
}
if (remainingPerDiskSwapTimeMs(swapStartTimeMs) <= 0) {
LOG.debug("Swap load out timeout for disk {}.", disk.logDir());
break;
}
if (diskUtilizationPercentage(candidateDisk) < _balanceUpperThresholdByBroker.get(broker)) {
candidateDiskPQ.add(candidateDisk);
}
}
}
use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.
the class IntraBrokerDiskUsageDistributionGoal method rebalanceByMovingLoadOut.
/**
* Try to balance the overloaded disk by moving out replicas to other disks of the same broker.
*
* @param disk The disk to balance.
* @param clusterModel The current cluster model.
* @param optimizedGoals Optimized goals.
* @param optimizationOptions Options to take into account during optimization -- e.g. excluded topics.
* @return {@code true} if the disk to balance is still overloaded, {@code false} otherwise.
*/
private boolean rebalanceByMovingLoadOut(Disk disk, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
Broker broker = disk.broker();
double brokerUtilization = averageDiskUtilizationPercentage(broker);
PriorityQueue<Disk> candidateDiskPQ = new PriorityQueue<>(Comparator.comparingDouble(GoalUtils::diskUtilizationPercentage));
for (Disk candidateDisk : broker.disks()) {
// Get candidate disk on broker to try moving load to -- sorted in the order of trial (ascending load).
if (candidateDisk.isAlive() && diskUtilizationPercentage(candidateDisk) < brokerUtilization) {
candidateDiskPQ.add(candidateDisk);
}
}
while (!candidateDiskPQ.isEmpty()) {
Disk candidateDisk = candidateDiskPQ.poll();
for (Iterator<Replica> iterator = disk.trackedSortedReplicas(replicaSortName(this, true, false)).sortedReplicas(true).iterator(); iterator.hasNext(); ) {
Replica replica = iterator.next();
Disk d = maybeMoveReplicaBetweenDisks(clusterModel, replica, Collections.singleton(candidateDisk), optimizedGoals);
// move to destination disk. In that case we will never re-enqueue that destination disk.
if (d != null) {
if (diskUtilizationPercentage(disk) < _balanceUpperThresholdByBroker.get(broker)) {
return false;
}
iterator.remove();
// we re-enqueue the destination disk and switch to the next disk.
if (!candidateDiskPQ.isEmpty() && diskUtilizationPercentage(candidateDisk) > diskUtilizationPercentage(candidateDiskPQ.peek())) {
candidateDiskPQ.add(candidateDisk);
break;
}
}
}
}
return true;
}
use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.
the class PreferredLeaderElectionGoal method optimize.
@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
sanityCheckOptimizationOptions(optimizationOptions);
// First move the replica on the demoted brokers to the end of the replica list.
// If all the replicas are demoted, no change is made to the leader.
boolean hasBrokerOrDiskToBeDemoted = false;
Set<TopicPartition> partitionsToMove = new HashSet<>();
for (Broker b : clusterModel.aliveBrokers()) {
if (b.isDemoted()) {
hasBrokerOrDiskToBeDemoted = true;
for (Replica r : b.replicas()) {
maybeMoveReplicaToEndOfReplicaList(r, clusterModel);
}
maybeChangeLeadershipForPartition(b.leaderReplicas(), partitionsToMove);
} else {
for (Disk d : b.disks()) {
if (d.state() == Disk.State.DEMOTED) {
hasBrokerOrDiskToBeDemoted = true;
for (Replica r : d.replicas()) {
maybeMoveReplicaToEndOfReplicaList(r, clusterModel);
}
maybeChangeLeadershipForPartition(d.leaderReplicas(), partitionsToMove);
}
}
}
}
// Check whether this goal has relocated any leadership.
boolean relocatedLeadership = false;
Set<Integer> excludedBrokersForLeadership = optimizationOptions.excludedBrokersForLeadership();
// Ignore the excluded topics because this goal does not move partitions.
for (List<Partition> partitions : clusterModel.getPartitionsByTopic().values()) {
for (Partition p : partitions) {
if (hasBrokerOrDiskToBeDemoted && !partitionsToMove.contains(p.topicPartition())) {
continue;
}
for (int i = 0; i < p.replicas().size(); i++) {
// If there is no broker or disk to be demoted, only try to transfer the leadership to the first replica of the partition.
if (!hasBrokerOrDiskToBeDemoted && i > 0) {
break;
}
Replica r = p.replicas().get(i);
// Iterate over the replicas and ensure that (1) the leader is set to the first alive replica, and (2) the
// leadership is not transferred to a broker excluded for leadership transfer.
Broker leaderCandidate = r.broker();
if (leaderCandidate.isAlive()) {
if (r.isCurrentOffline()) {
LOG.warn("The preferred replica of partition {} on broker {} is offline.", p.topicPartition(), leaderCandidate);
continue;
}
if (!r.isLeader()) {
if (excludedBrokersForLeadership.contains(leaderCandidate.id())) {
LOG.warn("Skipped leadership transfer of partition {} to broker {} because it is among brokers excluded" + " for leadership {}.", p.topicPartition(), leaderCandidate, excludedBrokersForLeadership);
continue;
}
clusterModel.relocateLeadership(r.topicPartition(), p.leader().broker().id(), leaderCandidate.id());
relocatedLeadership = true;
}
if (clusterModel.demotedBrokers().contains(leaderCandidate)) {
LOG.warn("The leader of partition {} has to be on a demoted broker {} because all the alive " + "replicas are demoted.", p.topicPartition(), leaderCandidate.id());
}
if (r.disk() != null && r.disk().state() == Disk.State.DEMOTED) {
LOG.warn("The leader of partition {} has to be on a demoted disk {} of broker {} because all the alive " + "replicas are demoted.", p.topicPartition(), r.disk().logDir(), leaderCandidate.id());
}
break;
}
}
}
}
// This goal is optimized in one pass.
finish();
// Return true if at least one leadership has been relocated.
return relocatedLeadership;
}
use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.
the class IntraBrokerDiskCapacityGoal method updateGoalState.
/**
* Update goal state.
* Sanity check: After completion of balancing the resource, confirm that the utilization is under the capacity and finish.
*
* @param clusterModel The state of the cluster.
* @param optimizationOptions Options to take into account during optimization.
*/
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
for (Broker broker : brokersToBalance(clusterModel)) {
for (Disk disk : broker.disks()) {
if (disk.isAlive() && isUtilizationOverLimit(disk)) {
// The utilization of the host for the resource is over the capacity limit.
double requiredCapacity = disk.utilization() / _balancingConstraint.capacityThreshold(RESOURCE);
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numDisks(1).totalCapacity(requiredCapacity).build();
throw new OptimizationFailureException(String.format("[%s] Utilization (%.2f) for disk %s on broker %d is above capacity limit.", name(), disk.utilization(), disk, broker.id()), recommendation);
}
}
}
finish();
}
use of com.linkedin.kafka.cruisecontrol.model.Disk in project cruise-control by linkedin.
the class PreferredLeaderElectionGoalTest method testOptimizeWithDemotedDisks.
@Test
public void testOptimizeWithDemotedDisks() {
ClusterModel clusterModel = createClusterModel(true, true).clusterModel();
clusterModel.broker(0).disk(LOGDIR0).setState(Disk.State.DEMOTED);
clusterModel.broker(1).disk(LOGDIR1).setState(Disk.State.DEMOTED);
Set<TopicPartition> leaderPartitionsOnDemotedDisk = new HashSet<>();
clusterModel.broker(0).disk(LOGDIR0).leaderReplicas().forEach(r -> leaderPartitionsOnDemotedDisk.add(r.topicPartition()));
clusterModel.broker(1).disk(LOGDIR1).leaderReplicas().forEach(r -> leaderPartitionsOnDemotedDisk.add(r.topicPartition()));
Map<TopicPartition, Integer> leaderDistributionBeforeBrokerDemotion = new HashMap<>();
clusterModel.brokers().forEach(b -> b.leaderReplicas().forEach(r -> leaderDistributionBeforeBrokerDemotion.put(r.topicPartition(), b.id())));
PreferredLeaderElectionGoal goal = new PreferredLeaderElectionGoal(false, false, null);
// Before the optimization, goals are expected to be undecided wrt their provision status.
assertEquals(ProvisionStatus.UNDECIDED, goal.provisionResponse().status());
goal.optimize(clusterModel, Collections.emptySet(), new OptimizationOptions(Collections.emptySet(), Collections.emptySet(), Collections.emptySet()));
// After the optimization, PreferredLeaderElectionGoal is expected to be undecided wrt its provision status.
assertEquals(ProvisionStatus.UNDECIDED, goal.provisionResponse().status());
for (String t : Arrays.asList(TOPIC0, TOPIC1, TOPIC2)) {
for (int p = 0; p < 3; p++) {
TopicPartition tp = new TopicPartition(t, p);
if (!leaderPartitionsOnDemotedDisk.contains(tp)) {
int oldLeaderBroker = leaderDistributionBeforeBrokerDemotion.get(tp);
assertEquals("Tp " + tp, oldLeaderBroker, clusterModel.partition(tp).leader().broker().id());
} else {
List<Replica> replicas = clusterModel.partition(tp).replicas();
for (int i = 0; i < 3; i++) {
Replica replica = replicas.get(i);
// only the first replica should be leader.
assertEquals(i == 0, replica.isLeader());
if (clusterModel.broker(0).disk(LOGDIR0).replicas().contains(replica) || clusterModel.broker(1).disk(LOGDIR1).replicas().contains(replica)) {
// The demoted replica should be in the last position.
assertEquals(replica.topicPartition() + " broker " + replica.broker().id(), replicas.size() - 1, i);
}
}
}
}
}
}
Aggregations