use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.
the class ClusterModel method refreshCapacity.
private void refreshCapacity() {
for (Resource r : Resource.cachedValues()) {
double capacity = 0;
for (Rack rack : _racksById.values()) {
capacity += rack.capacityFor(r);
}
_clusterCapacity[r.id()] = capacity;
}
}
use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.
the class ClusterModel method sanityCheck.
/**
* (1) Check whether each load in the cluster contains exactly the number of windows defined by the Load.
* (2) Check whether sum of loads in the cluster / rack / broker / replica are consistent with each other.
*/
public void sanityCheck() {
// SANITY CHECK #1: Each load in the cluster must contain exactly the number of windows defined by the Load.
Map<String, Integer> errorMsgAndNumWindows = new HashMap<>();
int expectedNumWindows = _load.numWindows();
// Check leadership loads.
for (Map.Entry<Integer, Load> entry : _potentialLeadershipLoadByBrokerId.entrySet()) {
int brokerId = entry.getKey();
Load load = entry.getValue();
if (load.numWindows() != expectedNumWindows && broker(brokerId).replicas().size() != 0) {
errorMsgAndNumWindows.put("Leadership(" + brokerId + ")", load.numWindows());
}
}
// Check rack loads.
for (Rack rack : _racksById.values()) {
if (rack.load().numWindows() != expectedNumWindows && rack.replicas().size() != 0) {
errorMsgAndNumWindows.put("Rack(id:" + rack.id() + ")", rack.load().numWindows());
}
// Check the host load.
for (Host host : rack.hosts()) {
if (host.load().numWindows() != expectedNumWindows && host.replicas().size() != 0) {
errorMsgAndNumWindows.put("Host(id:" + host.name() + ")", host.load().numWindows());
}
// Check broker loads.
for (Broker broker : rack.brokers()) {
if (broker.load().numWindows() != expectedNumWindows && broker.replicas().size() != 0) {
errorMsgAndNumWindows.put("Broker(id:" + broker.id() + ")", broker.load().numWindows());
}
// Check replica loads.
for (Replica replica : broker.replicas()) {
if (replica.load().numWindows() != expectedNumWindows) {
errorMsgAndNumWindows.put("Replica(id:" + replica.topicPartition() + "-" + broker.id() + ")", replica.load().numWindows());
}
}
}
}
}
StringBuilder exceptionMsg = new StringBuilder();
for (Map.Entry<String, Integer> entry : errorMsgAndNumWindows.entrySet()) {
exceptionMsg.append(String.format("[%s: %d]%n", entry.getKey(), entry.getValue()));
}
if (exceptionMsg.length() > 0) {
throw new IllegalArgumentException("Loads must have all have " + expectedNumWindows + " windows. Following " + "loads violate this constraint with specified number of windows: " + exceptionMsg);
}
// SANITY CHECK #2: Sum of loads in the cluster / rack / broker / replica must be consistent with each other.
String prologueErrorMsg = "Inconsistent load distribution.";
// Check equality of sum of the replica load to their broker load for each resource.
for (Broker broker : brokers()) {
for (Resource resource : Resource.values()) {
double sumOfReplicaUtilization = 0.0;
for (Replica replica : broker.replicas()) {
sumOfReplicaUtilization += replica.load().expectedUtilizationFor(resource);
}
if (AnalyzerUtils.compare(sumOfReplicaUtilization, broker.load().expectedUtilizationFor(resource), resource) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Broker utilization for " + resource + " is different " + "from the total replica utilization in the broker with id: " + broker.id() + ". Sum of the replica utilization: " + sumOfReplicaUtilization + ", broker utilization: " + broker.load().expectedUtilizationFor(resource));
}
}
}
// Check equality of sum of the broker load to their rack load for each resource.
Map<Resource, Double> sumOfRackUtilizationByResource = new HashMap<>();
for (Rack rack : _racksById.values()) {
Map<Resource, Double> sumOfHostUtilizationByResource = new HashMap<>();
for (Host host : rack.hosts()) {
for (Resource resource : Resource.values()) {
sumOfHostUtilizationByResource.putIfAbsent(resource, 0.0);
double sumOfBrokerUtilization = 0.0;
for (Broker broker : host.brokers()) {
sumOfBrokerUtilization += broker.load().expectedUtilizationFor(resource);
}
Double hostUtilization = host.load().expectedUtilizationFor(resource);
if (AnalyzerUtils.compare(sumOfBrokerUtilization, hostUtilization, resource) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Host utilization for " + resource + " is different " + "from the total broker utilization in the host : " + host.name() + ". Sum of the brokers: " + sumOfBrokerUtilization + ", host utilization: " + hostUtilization);
}
sumOfHostUtilizationByResource.put(resource, sumOfHostUtilizationByResource.get(resource) + hostUtilization);
}
}
// Check equality of sum of the host load to the rack load for each resource.
for (Map.Entry<Resource, Double> entry : sumOfHostUtilizationByResource.entrySet()) {
Resource resource = entry.getKey();
double sumOfHostsUtil = entry.getValue();
sumOfRackUtilizationByResource.putIfAbsent(resource, 0.0);
Double rackUtilization = rack.load().expectedUtilizationFor(resource);
if (AnalyzerUtils.compare(rackUtilization, sumOfHostsUtil, resource) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Rack utilization for " + resource + " is different " + "from the total host utilization in rack" + rack.id() + " . Sum of the hosts: " + sumOfHostsUtil + ", rack utilization: " + rack.load().expectedUtilizationFor(resource));
}
sumOfRackUtilizationByResource.put(resource, sumOfRackUtilizationByResource.get(resource) + sumOfHostUtilizationByResource.get(resource));
}
}
// Check equality of sum of the rack load to the cluster load for each resource.
for (Map.Entry<Resource, Double> entry : sumOfRackUtilizationByResource.entrySet()) {
Resource resource = entry.getKey();
double sumOfRackUtil = entry.getValue();
if (AnalyzerUtils.compare(_load.expectedUtilizationFor(resource), sumOfRackUtil, resource) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Cluster utilization for " + resource + " is different " + "from the total rack utilization in the cluster. Sum of the racks: " + sumOfRackUtil + ", cluster utilization: " + _load.expectedUtilizationFor(resource));
}
}
// Check equality of the sum of the leadership load to the sum of the load of leader at each broker.
for (Broker broker : brokers()) {
double sumOfLeaderOfReplicaUtilization = 0.0;
for (Replica replica : broker.replicas()) {
sumOfLeaderOfReplicaUtilization += partition(replica.topicPartition()).leader().load().expectedUtilizationFor(Resource.NW_OUT);
}
if (AnalyzerUtils.compare(sumOfLeaderOfReplicaUtilization, _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT), Resource.NW_OUT) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Leadership utilization for " + Resource.NW_OUT + " is different from the total utilization leader of replicas in the broker" + " with id: " + broker.id() + " Expected: " + sumOfLeaderOfReplicaUtilization + " Received: " + _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT) + ".");
}
for (Resource resource : Resource.values()) {
if (resource == Resource.CPU) {
continue;
}
double leaderSum = broker.leaderReplicas().stream().mapToDouble(r -> r.load().expectedUtilizationFor(resource)).sum();
double cachedLoad = broker.leadershipLoadForNwResources().expectedUtilizationFor(resource);
if (AnalyzerUtils.compare(leaderSum, cachedLoad, resource) != 0) {
throw new IllegalArgumentException(prologueErrorMsg + " Leadership load for resource " + resource + " is " + cachedLoad + " but recomputed sum is " + leaderSum + ".");
}
}
}
}
use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.
the class ClusterModelStats method getJsonStructure.
/*
* Return an object that can be further used
* to encode into JSON
*/
public Map<String, Object> getJsonStructure() {
Map<String, Object> statMap = new HashMap<>();
Map<String, Integer> basicMap = new HashMap<>();
basicMap.put("brokers", numBrokers());
basicMap.put("replicas", numReplicasInCluster());
basicMap.put("topics", numTopics());
// List of all statistics AVG, MAX, MIN, STD
Map<String, Object> allStatMap = new HashMap();
for (Statistic stat : Statistic.values()) {
Map<String, Object> resourceMap = new HashMap<>();
for (Resource resource : Resource.values()) {
resourceMap.put(resource.resource(), resourceUtilizationStats().get(stat).get(resource));
}
resourceMap.put("potentialNwOut", potentialNwOutUtilizationStats().get(stat));
resourceMap.put("replicas", replicaStats().get(stat));
resourceMap.put("topicReplicas", topicReplicaStats().get(stat));
allStatMap.put(stat.stat(), resourceMap);
}
statMap.put("metadata", basicMap);
statMap.put("statistics", allStatMap);
return statMap;
}
use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.
the class Replica method makeFollower.
/**
* (1) Remove leadership from the replica.
* (2) Clear and get the outbound network load associated with leadership from the given replica.
* (3) Clear and get the CPU leadership load associated with leadership from the given replica.
*
* @return Removed leadership load by snapshot time -- i.e. outbound network and fraction of CPU load by snapshot time.
*/
Map<Resource, double[]> makeFollower() {
// Remove leadership from the replica.
setLeadership(false);
// Clear and get the outbound network load associated with leadership from the given replica.
double[] leadershipNwOutLoad = _load.loadFor(Resource.NW_OUT).doubleArray();
MetricValues leadershipCpuLoad = _load.loadFor(Resource.CPU);
// Remove the outbound network leadership load from replica.
_load.clearLoadFor(Resource.NW_OUT);
double[] followerCpuLoad = new double[_load.numWindows()];
double[] cpuLoadChange = new double[_load.numWindows()];
for (int i = 0; i < leadershipCpuLoad.length(); i++) {
double newCpuLoad = ModelUtils.getFollowerCpuUtilFromLeaderLoad(_load.loadFor(Resource.NW_IN).get(i), _load.loadFor(Resource.NW_OUT).get(i), leadershipCpuLoad.get(i));
followerCpuLoad[i] = newCpuLoad;
cpuLoadChange[i] = leadershipCpuLoad.get(i) - newCpuLoad;
}
_load.setLoadFor(Resource.CPU, followerCpuLoad);
// Get the change of the load for upper layer.
Map<Resource, double[]> leadershipLoad = new HashMap<>();
leadershipLoad.put(Resource.NW_OUT, leadershipNwOutLoad);
leadershipLoad.put(Resource.CPU, cpuLoadChange);
// Return removed leadership load.
return leadershipLoad;
}
use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.
the class LoadMonitor method populateSnapshots.
private void populateSnapshots(Cluster kafkaCluster, ClusterModel clusterModel, TopicPartition tp, ValuesAndExtrapolations valuesAndExtrapolations) {
PartitionInfo partitionInfo = kafkaCluster.partition(tp);
// If partition info does not exist, the topic may have been deleted.
if (partitionInfo != null) {
for (int index = 0; index < partitionInfo.replicas().length; index++) {
Node replica = partitionInfo.replicas()[index];
boolean isLeader;
if (partitionInfo.leader() == null) {
LOG.warn("Detected offline partition {}-{}, skipping", partitionInfo.topic(), partitionInfo.partition());
continue;
} else {
isLeader = replica.id() == partitionInfo.leader().id();
}
String rack = getRackHandleNull(replica);
// Note that we assume the capacity resolver can still return the broker capacity even if the broker
// is dead. We need this to get the host resource capacity.
Map<Resource, Double> brokerCapacity = _brokerCapacityConfigResolver.capacityForBroker(rack, replica.host(), replica.id());
clusterModel.createReplicaHandleDeadBroker(rack, replica.id(), tp, index, isLeader, brokerCapacity);
AggregatedMetricValues aggregatedMetricValues = valuesAndExtrapolations.metricValues();
clusterModel.setReplicaLoad(rack, replica.id(), tp, isLeader ? aggregatedMetricValues : MonitorUtils.toFollowerMetricValues(aggregatedMetricValues), valuesAndExtrapolations.windows());
}
}
}
Aggregations