Search in sources :

Example 11 with Resource

use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.

the class ClusterModel method refreshCapacity.

private void refreshCapacity() {
    for (Resource r : Resource.cachedValues()) {
        double capacity = 0;
        for (Rack rack : _racksById.values()) {
            capacity += rack.capacityFor(r);
        }
        _clusterCapacity[r.id()] = capacity;
    }
}
Also used : Resource(com.linkedin.kafka.cruisecontrol.common.Resource)

Example 12 with Resource

use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.

the class ClusterModel method sanityCheck.

/**
 * (1) Check whether each load in the cluster contains exactly the number of windows defined by the Load.
 * (2) Check whether sum of loads in the cluster / rack / broker / replica are consistent with each other.
 */
public void sanityCheck() {
    // SANITY CHECK #1: Each load in the cluster must contain exactly the number of windows defined by the Load.
    Map<String, Integer> errorMsgAndNumWindows = new HashMap<>();
    int expectedNumWindows = _load.numWindows();
    // Check leadership loads.
    for (Map.Entry<Integer, Load> entry : _potentialLeadershipLoadByBrokerId.entrySet()) {
        int brokerId = entry.getKey();
        Load load = entry.getValue();
        if (load.numWindows() != expectedNumWindows && broker(brokerId).replicas().size() != 0) {
            errorMsgAndNumWindows.put("Leadership(" + brokerId + ")", load.numWindows());
        }
    }
    // Check rack loads.
    for (Rack rack : _racksById.values()) {
        if (rack.load().numWindows() != expectedNumWindows && rack.replicas().size() != 0) {
            errorMsgAndNumWindows.put("Rack(id:" + rack.id() + ")", rack.load().numWindows());
        }
        // Check the host load.
        for (Host host : rack.hosts()) {
            if (host.load().numWindows() != expectedNumWindows && host.replicas().size() != 0) {
                errorMsgAndNumWindows.put("Host(id:" + host.name() + ")", host.load().numWindows());
            }
            // Check broker loads.
            for (Broker broker : rack.brokers()) {
                if (broker.load().numWindows() != expectedNumWindows && broker.replicas().size() != 0) {
                    errorMsgAndNumWindows.put("Broker(id:" + broker.id() + ")", broker.load().numWindows());
                }
                // Check replica loads.
                for (Replica replica : broker.replicas()) {
                    if (replica.load().numWindows() != expectedNumWindows) {
                        errorMsgAndNumWindows.put("Replica(id:" + replica.topicPartition() + "-" + broker.id() + ")", replica.load().numWindows());
                    }
                }
            }
        }
    }
    StringBuilder exceptionMsg = new StringBuilder();
    for (Map.Entry<String, Integer> entry : errorMsgAndNumWindows.entrySet()) {
        exceptionMsg.append(String.format("[%s: %d]%n", entry.getKey(), entry.getValue()));
    }
    if (exceptionMsg.length() > 0) {
        throw new IllegalArgumentException("Loads must have all have " + expectedNumWindows + " windows. Following " + "loads violate this constraint with specified number of windows: " + exceptionMsg);
    }
    // SANITY CHECK #2: Sum of loads in the cluster / rack / broker / replica must be consistent with each other.
    String prologueErrorMsg = "Inconsistent load distribution.";
    // Check equality of sum of the replica load to their broker load for each resource.
    for (Broker broker : brokers()) {
        for (Resource resource : Resource.values()) {
            double sumOfReplicaUtilization = 0.0;
            for (Replica replica : broker.replicas()) {
                sumOfReplicaUtilization += replica.load().expectedUtilizationFor(resource);
            }
            if (AnalyzerUtils.compare(sumOfReplicaUtilization, broker.load().expectedUtilizationFor(resource), resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Broker utilization for " + resource + " is different " + "from the total replica utilization in the broker with id: " + broker.id() + ". Sum of the replica utilization: " + sumOfReplicaUtilization + ", broker utilization: " + broker.load().expectedUtilizationFor(resource));
            }
        }
    }
    // Check equality of sum of the broker load to their rack load for each resource.
    Map<Resource, Double> sumOfRackUtilizationByResource = new HashMap<>();
    for (Rack rack : _racksById.values()) {
        Map<Resource, Double> sumOfHostUtilizationByResource = new HashMap<>();
        for (Host host : rack.hosts()) {
            for (Resource resource : Resource.values()) {
                sumOfHostUtilizationByResource.putIfAbsent(resource, 0.0);
                double sumOfBrokerUtilization = 0.0;
                for (Broker broker : host.brokers()) {
                    sumOfBrokerUtilization += broker.load().expectedUtilizationFor(resource);
                }
                Double hostUtilization = host.load().expectedUtilizationFor(resource);
                if (AnalyzerUtils.compare(sumOfBrokerUtilization, hostUtilization, resource) != 0) {
                    throw new IllegalArgumentException(prologueErrorMsg + " Host utilization for " + resource + " is different " + "from the total broker utilization in the host : " + host.name() + ". Sum of the brokers: " + sumOfBrokerUtilization + ", host utilization: " + hostUtilization);
                }
                sumOfHostUtilizationByResource.put(resource, sumOfHostUtilizationByResource.get(resource) + hostUtilization);
            }
        }
        // Check equality of sum of the host load to the rack load for each resource.
        for (Map.Entry<Resource, Double> entry : sumOfHostUtilizationByResource.entrySet()) {
            Resource resource = entry.getKey();
            double sumOfHostsUtil = entry.getValue();
            sumOfRackUtilizationByResource.putIfAbsent(resource, 0.0);
            Double rackUtilization = rack.load().expectedUtilizationFor(resource);
            if (AnalyzerUtils.compare(rackUtilization, sumOfHostsUtil, resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Rack utilization for " + resource + " is different " + "from the total host utilization in rack" + rack.id() + " . Sum of the hosts: " + sumOfHostsUtil + ", rack utilization: " + rack.load().expectedUtilizationFor(resource));
            }
            sumOfRackUtilizationByResource.put(resource, sumOfRackUtilizationByResource.get(resource) + sumOfHostUtilizationByResource.get(resource));
        }
    }
    // Check equality of sum of the rack load to the cluster load for each resource.
    for (Map.Entry<Resource, Double> entry : sumOfRackUtilizationByResource.entrySet()) {
        Resource resource = entry.getKey();
        double sumOfRackUtil = entry.getValue();
        if (AnalyzerUtils.compare(_load.expectedUtilizationFor(resource), sumOfRackUtil, resource) != 0) {
            throw new IllegalArgumentException(prologueErrorMsg + " Cluster utilization for " + resource + " is different " + "from the total rack utilization in the cluster. Sum of the racks: " + sumOfRackUtil + ", cluster utilization: " + _load.expectedUtilizationFor(resource));
        }
    }
    // Check equality of the sum of the leadership load to the sum of the load of leader at each broker.
    for (Broker broker : brokers()) {
        double sumOfLeaderOfReplicaUtilization = 0.0;
        for (Replica replica : broker.replicas()) {
            sumOfLeaderOfReplicaUtilization += partition(replica.topicPartition()).leader().load().expectedUtilizationFor(Resource.NW_OUT);
        }
        if (AnalyzerUtils.compare(sumOfLeaderOfReplicaUtilization, _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT), Resource.NW_OUT) != 0) {
            throw new IllegalArgumentException(prologueErrorMsg + " Leadership utilization for " + Resource.NW_OUT + " is different from the total utilization leader of replicas in the broker" + " with id: " + broker.id() + " Expected: " + sumOfLeaderOfReplicaUtilization + " Received: " + _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT) + ".");
        }
        for (Resource resource : Resource.values()) {
            if (resource == Resource.CPU) {
                continue;
            }
            double leaderSum = broker.leaderReplicas().stream().mapToDouble(r -> r.load().expectedUtilizationFor(resource)).sum();
            double cachedLoad = broker.leadershipLoadForNwResources().expectedUtilizationFor(resource);
            if (AnalyzerUtils.compare(leaderSum, cachedLoad, resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Leadership load for resource " + resource + " is " + cachedLoad + " but recomputed sum is " + leaderSum + ".");
            }
        }
    }
}
Also used : SortedSet(java.util.SortedSet) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Gson(com.google.gson.Gson) Map(java.util.Map) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) ModelGeneration(com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration) OutputStream(java.io.OutputStream) TopicPartition(org.apache.kafka.common.TopicPartition) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint) Set(java.util.Set) AnalyzerUtils(com.linkedin.kafka.cruisecontrol.analyzer.AnalyzerUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Collections(java.util.Collections) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Example 13 with Resource

use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.

the class ClusterModelStats method getJsonStructure.

/*
   * Return an object that can be further used
   * to encode into JSON
   */
public Map<String, Object> getJsonStructure() {
    Map<String, Object> statMap = new HashMap<>();
    Map<String, Integer> basicMap = new HashMap<>();
    basicMap.put("brokers", numBrokers());
    basicMap.put("replicas", numReplicasInCluster());
    basicMap.put("topics", numTopics());
    // List of all statistics AVG, MAX, MIN, STD
    Map<String, Object> allStatMap = new HashMap();
    for (Statistic stat : Statistic.values()) {
        Map<String, Object> resourceMap = new HashMap<>();
        for (Resource resource : Resource.values()) {
            resourceMap.put(resource.resource(), resourceUtilizationStats().get(stat).get(resource));
        }
        resourceMap.put("potentialNwOut", potentialNwOutUtilizationStats().get(stat));
        resourceMap.put("replicas", replicaStats().get(stat));
        resourceMap.put("topicReplicas", topicReplicaStats().get(stat));
        allStatMap.put(stat.stat(), resourceMap);
    }
    statMap.put("metadata", basicMap);
    statMap.put("statistics", allStatMap);
    return statMap;
}
Also used : Statistic(com.linkedin.kafka.cruisecontrol.common.Statistic) HashMap(java.util.HashMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource)

Example 14 with Resource

use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.

the class Replica method makeFollower.

/**
 * (1) Remove leadership from the replica.
 * (2) Clear and get the outbound network load associated with leadership from the given replica.
 * (3) Clear and get the CPU leadership load associated with leadership from the given replica.
 *
 * @return Removed leadership load by snapshot time -- i.e. outbound network and fraction of CPU load by snapshot time.
 */
Map<Resource, double[]> makeFollower() {
    // Remove leadership from the replica.
    setLeadership(false);
    // Clear and get the outbound network load associated with leadership from the given replica.
    double[] leadershipNwOutLoad = _load.loadFor(Resource.NW_OUT).doubleArray();
    MetricValues leadershipCpuLoad = _load.loadFor(Resource.CPU);
    // Remove the outbound network leadership load from replica.
    _load.clearLoadFor(Resource.NW_OUT);
    double[] followerCpuLoad = new double[_load.numWindows()];
    double[] cpuLoadChange = new double[_load.numWindows()];
    for (int i = 0; i < leadershipCpuLoad.length(); i++) {
        double newCpuLoad = ModelUtils.getFollowerCpuUtilFromLeaderLoad(_load.loadFor(Resource.NW_IN).get(i), _load.loadFor(Resource.NW_OUT).get(i), leadershipCpuLoad.get(i));
        followerCpuLoad[i] = newCpuLoad;
        cpuLoadChange[i] = leadershipCpuLoad.get(i) - newCpuLoad;
    }
    _load.setLoadFor(Resource.CPU, followerCpuLoad);
    // Get the change of the load for upper layer.
    Map<Resource, double[]> leadershipLoad = new HashMap<>();
    leadershipLoad.put(Resource.NW_OUT, leadershipNwOutLoad);
    leadershipLoad.put(Resource.CPU, cpuLoadChange);
    // Return removed leadership load.
    return leadershipLoad;
}
Also used : HashMap(java.util.HashMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) MetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.MetricValues)

Example 15 with Resource

use of com.linkedin.kafka.cruisecontrol.common.Resource in project cruise-control by linkedin.

the class LoadMonitor method populateSnapshots.

private void populateSnapshots(Cluster kafkaCluster, ClusterModel clusterModel, TopicPartition tp, ValuesAndExtrapolations valuesAndExtrapolations) {
    PartitionInfo partitionInfo = kafkaCluster.partition(tp);
    // If partition info does not exist, the topic may have been deleted.
    if (partitionInfo != null) {
        for (int index = 0; index < partitionInfo.replicas().length; index++) {
            Node replica = partitionInfo.replicas()[index];
            boolean isLeader;
            if (partitionInfo.leader() == null) {
                LOG.warn("Detected offline partition {}-{}, skipping", partitionInfo.topic(), partitionInfo.partition());
                continue;
            } else {
                isLeader = replica.id() == partitionInfo.leader().id();
            }
            String rack = getRackHandleNull(replica);
            // Note that we assume the capacity resolver can still return the broker capacity even if the broker
            // is dead. We need this to get the host resource capacity.
            Map<Resource, Double> brokerCapacity = _brokerCapacityConfigResolver.capacityForBroker(rack, replica.host(), replica.id());
            clusterModel.createReplicaHandleDeadBroker(rack, replica.id(), tp, index, isLeader, brokerCapacity);
            AggregatedMetricValues aggregatedMetricValues = valuesAndExtrapolations.metricValues();
            clusterModel.setReplicaLoad(rack, replica.id(), tp, isLeader ? aggregatedMetricValues : MonitorUtils.toFollowerMetricValues(aggregatedMetricValues), valuesAndExtrapolations.windows());
        }
    }
}
Also used : Node(org.apache.kafka.common.Node) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) PartitionInfo(org.apache.kafka.common.PartitionInfo) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues)

Aggregations

Resource (com.linkedin.kafka.cruisecontrol.common.Resource)25 HashMap (java.util.HashMap)9 ArrayList (java.util.ArrayList)7 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)6 AggregatedMetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues)5 Statistic (com.linkedin.kafka.cruisecontrol.common.Statistic)5 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)5 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)5 List (java.util.List)5 TopicPartition (org.apache.kafka.common.TopicPartition)5 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)4 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)4 Collections (java.util.Collections)4 HashSet (java.util.HashSet)4 Map (java.util.Map)4 Set (java.util.Set)4 SortedSet (java.util.SortedSet)4 TreeSet (java.util.TreeSet)4 Collectors (java.util.stream.Collectors)4 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)3