use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.
the class NodeSorter method createClusterSummarizedResources.
private ObjectResourcesSummary createClusterSummarizedResources() {
ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
// This is the first time so initialize the resources.
for (Map.Entry<String, List<String>> entry : networkTopography.entrySet()) {
String rackId = entry.getKey();
List<String> nodeHosts = entry.getValue();
ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
for (String nodeHost : nodeHosts) {
for (RasNode node : hostnameToNodes(nodeHost)) {
rack.availableResources.add(node.getTotalAvailableResources());
rack.totalResources.add(node.getTotalAvailableResources());
}
}
clusterResourcesSummary.addObjectResourcesItem(rack);
}
LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ]", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall());
return clusterResourcesSummary;
}
use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.
the class BaseResourceAwareStrategy method logClusterInfo.
/**
* Log a bunch of stuff for debugging.
*/
private void logClusterInfo() {
if (LOG.isDebugEnabled()) {
LOG.debug("Cluster:");
for (Map.Entry<String, List<String>> clusterEntry : networkTopography.entrySet()) {
String rackId = clusterEntry.getKey();
LOG.debug("Rack: {}", rackId);
for (String nodeHostname : clusterEntry.getValue()) {
for (RasNode node : hostnameToNodes(nodeHostname)) {
LOG.debug("-> Node: {} {}", node.getHostname(), node.getId());
LOG.debug("--> Avail Resources: {Mem {}, CPU {} Slots: {}}", node.getAvailableMemoryResources(), node.getAvailableCpuResources(), node.totalSlotsFree());
LOG.debug("--> Total Resources: {Mem {}, CPU {} Slots: {}}", node.getTotalMemoryResources(), node.getTotalCpuResources(), node.totalSlots());
}
}
}
}
}
use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.
the class ConstraintSolverStrategy method isExecAssignmentToWorkerValid.
/**
* Check if any constraints are violated if exec is scheduled on worker.
* @return true if scheduling exec on worker does not violate any constraints, returns false if it does
*/
@Override
protected boolean isExecAssignmentToWorkerValid(ExecutorDetails exec, WorkerSlot worker) {
if (!super.isExecAssignmentToWorkerValid(exec, worker)) {
return false;
}
// check if executor can be on worker based on component exclusions
String execComp = execToComp.get(exec);
Map<String, Integer> compAssignmentCnts = searcherState.getCompAssignmentCntMapForWorker(worker);
Set<String> incompatibleComponents;
if (compAssignmentCnts != null && (incompatibleComponents = constraintSolverConfig.getIncompatibleComponentSets().get(execComp)) != null && !incompatibleComponents.isEmpty()) {
for (String otherComp : compAssignmentCnts.keySet()) {
if (incompatibleComponents.contains(otherComp)) {
LOG.debug("Topology {}, exec={} with comp={} has constraint violation with comp={} on worker={}", topoName, exec, execComp, otherComp, worker);
return false;
}
}
}
// check if executor can be on worker based on component node co-location constraint
Map<String, Integer> maxNodeCoLocationCnts = constraintSolverConfig.getMaxNodeCoLocationCnts();
if (maxNodeCoLocationCnts.containsKey(execComp)) {
int coLocationMaxCnt = maxNodeCoLocationCnts.get(execComp);
RasNode node = nodes.getNodeById(worker.getNodeId());
int compCntOnNode = searcherState.getComponentCntOnNode(node, execComp);
if (compCntOnNode >= coLocationMaxCnt) {
LOG.debug("Topology {}, exec={} with comp={} has MaxCoLocationCnt violation on node {}, count {} >= colocation count {}", topoName, exec, execComp, node.getId(), compCntOnNode, coLocationMaxCnt);
return false;
}
}
return true;
}
use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.
the class ConstraintSolverStrategy method validateSolution.
/**
* Determines if a scheduling is valid and all constraints are satisfied (for use in testing).
* This is done in three steps.
*
* <li>Check if nodeCoLocationCnt-constraints are satisfied. Some components may allow only a certain number of
* executors to exist on the same node {@link ConstraintSolverConfig#getMaxNodeCoLocationCnts()}.
* </li>
*
* <li>
* Check if incompatibility-constraints are satisfied. Incompatible components
* {@link ConstraintSolverConfig#getIncompatibleComponentSets()} should not be put on the same worker.
* </li>
*
* <li>
* Check if CPU and Memory resources do not exceed availability on the node and total matches what is expected
* when fully scheduled.
* </li>
*
* @param cluster on which scheduling was done.
* @param topo TopologyDetails being scheduled.
* @return true if solution is valid, false otherwise.
*/
@VisibleForTesting
public static boolean validateSolution(Cluster cluster, TopologyDetails topo) {
assert (cluster.getAssignmentById(topo.getId()) != null);
LOG.debug("Checking for a valid scheduling for topology {}...", topo.getName());
ConstraintSolverConfig constraintSolverConfig = new ConstraintSolverConfig(topo);
// First check NodeCoLocationCnt constraints
Map<ExecutorDetails, String> execToComp = topo.getExecutorToComponent();
// this is the critical count
Map<String, Map<String, Integer>> nodeCompMap = new HashMap<>();
Map<WorkerSlot, RasNode> workerToNodes = new HashMap<>();
RasNodes.getAllNodesFrom(cluster).values().forEach(node -> node.getUsedSlots().forEach(workerSlot -> workerToNodes.put(workerSlot, node)));
List<String> errors = new ArrayList<>();
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : cluster.getAssignmentById(topo.getId()).getExecutorToSlot().entrySet()) {
ExecutorDetails exec = entry.getKey();
String comp = execToComp.get(exec);
WorkerSlot worker = entry.getValue();
RasNode node = workerToNodes.get(worker);
String nodeId = node.getId();
if (!constraintSolverConfig.getMaxNodeCoLocationCnts().containsKey(comp)) {
continue;
}
int allowedColocationMaxCnt = constraintSolverConfig.getMaxNodeCoLocationCnts().get(comp);
Map<String, Integer> oneNodeCompMap = nodeCompMap.computeIfAbsent(nodeId, (k) -> new HashMap<>());
oneNodeCompMap.put(comp, oneNodeCompMap.getOrDefault(comp, 0) + 1);
if (allowedColocationMaxCnt < oneNodeCompMap.get(comp)) {
String err = String.format("MaxNodeCoLocation: Component %s (exec=%s) on node %s, cnt %d > allowed %d", comp, exec, nodeId, oneNodeCompMap.get(comp), allowedColocationMaxCnt);
errors.add(err);
}
}
// Second check IncompatibileComponent Constraints
Map<WorkerSlot, Set<String>> workerCompMap = new HashMap<>();
cluster.getAssignmentById(topo.getId()).getExecutorToSlot().forEach((exec, worker) -> {
String comp = execToComp.get(exec);
workerCompMap.computeIfAbsent(worker, (k) -> new HashSet<>()).add(comp);
});
for (Map.Entry<WorkerSlot, Set<String>> entry : workerCompMap.entrySet()) {
Set<String> comps = entry.getValue();
for (String comp1 : comps) {
for (String comp2 : comps) {
if (!comp1.equals(comp2) && constraintSolverConfig.getIncompatibleComponentSets().containsKey(comp1) && constraintSolverConfig.getIncompatibleComponentSets().get(comp1).contains(comp2)) {
String err = String.format("IncompatibleComponents: %s and %s on WorkerSlot: %s", comp1, comp2, entry.getKey());
errors.add(err);
}
}
}
}
// Third check resources
SchedulerAssignment schedulerAssignment = cluster.getAssignmentById(topo.getId());
Map<ExecutorDetails, WorkerSlot> execToWorker = new HashMap<>();
if (schedulerAssignment.getExecutorToSlot() != null) {
execToWorker.putAll(schedulerAssignment.getExecutorToSlot());
}
Map<String, RasNode> nodes = RasNodes.getAllNodesFrom(cluster);
Map<RasNode, Collection<ExecutorDetails>> nodeToExecs = new HashMap<>();
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : execToWorker.entrySet()) {
ExecutorDetails exec = entry.getKey();
WorkerSlot worker = entry.getValue();
RasNode node = nodes.get(worker.getNodeId());
if (node.getAvailableMemoryResources() < 0.0) {
String err = String.format("Resource Exhausted: Found node %s with negative available memory %,.2f", node.getId(), node.getAvailableMemoryResources());
errors.add(err);
continue;
}
if (node.getAvailableCpuResources() < 0.0) {
String err = String.format("Resource Exhausted: Found node %s with negative available CPU %,.2f", node.getId(), node.getAvailableCpuResources());
errors.add(err);
continue;
}
nodeToExecs.computeIfAbsent(node, (k) -> new HashSet<>()).add(exec);
}
for (Map.Entry<RasNode, Collection<ExecutorDetails>> entry : nodeToExecs.entrySet()) {
RasNode node = entry.getKey();
Collection<ExecutorDetails> execs = entry.getValue();
double cpuUsed = 0.0;
double memoryUsed = 0.0;
for (ExecutorDetails exec : execs) {
cpuUsed += topo.getTotalCpuReqTask(exec);
memoryUsed += topo.getTotalMemReqTask(exec);
}
if (node.getAvailableCpuResources() != (node.getTotalCpuResources() - cpuUsed)) {
String err = String.format("Incorrect CPU Resources: Node %s CPU available is %,.2f, expected %,.2f, " + "Executors scheduled on node: %s", node.getId(), node.getAvailableCpuResources(), (node.getTotalCpuResources() - cpuUsed), execs);
errors.add(err);
}
if (node.getAvailableMemoryResources() != (node.getTotalMemoryResources() - memoryUsed)) {
String err = String.format("Incorrect Memory Resources: Node %s Memory available is %,.2f, expected %,.2f, " + "Executors scheduled on node: %s", node.getId(), node.getAvailableMemoryResources(), (node.getTotalMemoryResources() - memoryUsed), execs);
errors.add(err);
}
}
if (!errors.isEmpty()) {
LOG.error("Topology {} solution is invalid\n\t{}", topo.getName(), String.join("\n\t", errors));
}
return errors.isEmpty();
}
use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.
the class NodeSorterHostProximity method createClusterSummarizedResources.
private ObjectResourcesSummary createClusterSummarizedResources() {
ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
rackIdToHosts.forEach((rackId, hostIds) -> {
if (hostIds == null || hostIds.isEmpty()) {
LOG.info("Ignoring Rack {} since it has no hosts", rackId);
} else {
ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
for (String hostId : hostIds) {
for (RasNode node : hostnameToNodes(hostId)) {
rack.availableResources.add(node.getTotalAvailableResources());
rack.totalResources.add(node.getTotalResources());
}
}
clusterResourcesSummary.addObjectResourcesItem(rack);
}
});
LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ], rackCnt={}, hostCnt={}", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall(), clusterResourcesSummary.getObjectResources().size(), rackIdToHosts.values().stream().mapToInt(x -> x.size()).sum());
return clusterResourcesSummary;
}
Aggregations