Search in sources :

Example 1 with RasNode

use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.

the class NodeSorter method createClusterSummarizedResources.

private ObjectResourcesSummary createClusterSummarizedResources() {
    ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
    // This is the first time so initialize the resources.
    for (Map.Entry<String, List<String>> entry : networkTopography.entrySet()) {
        String rackId = entry.getKey();
        List<String> nodeHosts = entry.getValue();
        ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
        for (String nodeHost : nodeHosts) {
            for (RasNode node : hostnameToNodes(nodeHost)) {
                rack.availableResources.add(node.getTotalAvailableResources());
                rack.totalResources.add(node.getTotalAvailableResources());
            }
        }
        clusterResourcesSummary.addObjectResourcesItem(rack);
    }
    LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ]", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall());
    return clusterResourcesSummary;
}
Also used : ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) RasNode(org.apache.storm.scheduler.resource.RasNode) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with RasNode

use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.

the class BaseResourceAwareStrategy method logClusterInfo.

/**
 * Log a bunch of stuff for debugging.
 */
private void logClusterInfo() {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Cluster:");
        for (Map.Entry<String, List<String>> clusterEntry : networkTopography.entrySet()) {
            String rackId = clusterEntry.getKey();
            LOG.debug("Rack: {}", rackId);
            for (String nodeHostname : clusterEntry.getValue()) {
                for (RasNode node : hostnameToNodes(nodeHostname)) {
                    LOG.debug("-> Node: {} {}", node.getHostname(), node.getId());
                    LOG.debug("--> Avail Resources: {Mem {}, CPU {} Slots: {}}", node.getAvailableMemoryResources(), node.getAvailableCpuResources(), node.totalSlotsFree());
                    LOG.debug("--> Total Resources: {Mem {}, CPU {} Slots: {}}", node.getTotalMemoryResources(), node.getTotalCpuResources(), node.totalSlots());
                }
            }
        }
    }
}
Also used : RasNode(org.apache.storm.scheduler.resource.RasNode) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with RasNode

use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.

the class ConstraintSolverStrategy method isExecAssignmentToWorkerValid.

/**
 * Check if any constraints are violated if exec is scheduled on worker.
 * @return true if scheduling exec on worker does not violate any constraints, returns false if it does
 */
@Override
protected boolean isExecAssignmentToWorkerValid(ExecutorDetails exec, WorkerSlot worker) {
    if (!super.isExecAssignmentToWorkerValid(exec, worker)) {
        return false;
    }
    // check if executor can be on worker based on component exclusions
    String execComp = execToComp.get(exec);
    Map<String, Integer> compAssignmentCnts = searcherState.getCompAssignmentCntMapForWorker(worker);
    Set<String> incompatibleComponents;
    if (compAssignmentCnts != null && (incompatibleComponents = constraintSolverConfig.getIncompatibleComponentSets().get(execComp)) != null && !incompatibleComponents.isEmpty()) {
        for (String otherComp : compAssignmentCnts.keySet()) {
            if (incompatibleComponents.contains(otherComp)) {
                LOG.debug("Topology {}, exec={} with comp={} has constraint violation with comp={} on worker={}", topoName, exec, execComp, otherComp, worker);
                return false;
            }
        }
    }
    // check if executor can be on worker based on component node co-location constraint
    Map<String, Integer> maxNodeCoLocationCnts = constraintSolverConfig.getMaxNodeCoLocationCnts();
    if (maxNodeCoLocationCnts.containsKey(execComp)) {
        int coLocationMaxCnt = maxNodeCoLocationCnts.get(execComp);
        RasNode node = nodes.getNodeById(worker.getNodeId());
        int compCntOnNode = searcherState.getComponentCntOnNode(node, execComp);
        if (compCntOnNode >= coLocationMaxCnt) {
            LOG.debug("Topology {}, exec={} with comp={} has MaxCoLocationCnt violation on node {}, count {} >= colocation count {}", topoName, exec, execComp, node.getId(), compCntOnNode, coLocationMaxCnt);
            return false;
        }
    }
    return true;
}
Also used : RasNode(org.apache.storm.scheduler.resource.RasNode)

Example 4 with RasNode

use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.

the class ConstraintSolverStrategy method validateSolution.

/**
 * Determines if a scheduling is valid and all constraints are satisfied (for use in testing).
 * This is done in three steps.
 *
 * <li>Check if nodeCoLocationCnt-constraints are satisfied. Some components may allow only a certain number of
 * executors to exist on the same node {@link ConstraintSolverConfig#getMaxNodeCoLocationCnts()}.
 * </li>
 *
 * <li>
 * Check if incompatibility-constraints are satisfied. Incompatible components
 * {@link ConstraintSolverConfig#getIncompatibleComponentSets()} should not be put on the same worker.
 * </li>
 *
 * <li>
 * Check if CPU and Memory resources do not exceed availability on the node and total matches what is expected
 * when fully scheduled.
 * </li>
 *
 * @param cluster on which scheduling was done.
 * @param topo TopologyDetails being scheduled.
 * @return true if solution is valid, false otherwise.
 */
@VisibleForTesting
public static boolean validateSolution(Cluster cluster, TopologyDetails topo) {
    assert (cluster.getAssignmentById(topo.getId()) != null);
    LOG.debug("Checking for a valid scheduling for topology {}...", topo.getName());
    ConstraintSolverConfig constraintSolverConfig = new ConstraintSolverConfig(topo);
    // First check NodeCoLocationCnt constraints
    Map<ExecutorDetails, String> execToComp = topo.getExecutorToComponent();
    // this is the critical count
    Map<String, Map<String, Integer>> nodeCompMap = new HashMap<>();
    Map<WorkerSlot, RasNode> workerToNodes = new HashMap<>();
    RasNodes.getAllNodesFrom(cluster).values().forEach(node -> node.getUsedSlots().forEach(workerSlot -> workerToNodes.put(workerSlot, node)));
    List<String> errors = new ArrayList<>();
    for (Map.Entry<ExecutorDetails, WorkerSlot> entry : cluster.getAssignmentById(topo.getId()).getExecutorToSlot().entrySet()) {
        ExecutorDetails exec = entry.getKey();
        String comp = execToComp.get(exec);
        WorkerSlot worker = entry.getValue();
        RasNode node = workerToNodes.get(worker);
        String nodeId = node.getId();
        if (!constraintSolverConfig.getMaxNodeCoLocationCnts().containsKey(comp)) {
            continue;
        }
        int allowedColocationMaxCnt = constraintSolverConfig.getMaxNodeCoLocationCnts().get(comp);
        Map<String, Integer> oneNodeCompMap = nodeCompMap.computeIfAbsent(nodeId, (k) -> new HashMap<>());
        oneNodeCompMap.put(comp, oneNodeCompMap.getOrDefault(comp, 0) + 1);
        if (allowedColocationMaxCnt < oneNodeCompMap.get(comp)) {
            String err = String.format("MaxNodeCoLocation: Component %s (exec=%s) on node %s, cnt %d > allowed %d", comp, exec, nodeId, oneNodeCompMap.get(comp), allowedColocationMaxCnt);
            errors.add(err);
        }
    }
    // Second check IncompatibileComponent Constraints
    Map<WorkerSlot, Set<String>> workerCompMap = new HashMap<>();
    cluster.getAssignmentById(topo.getId()).getExecutorToSlot().forEach((exec, worker) -> {
        String comp = execToComp.get(exec);
        workerCompMap.computeIfAbsent(worker, (k) -> new HashSet<>()).add(comp);
    });
    for (Map.Entry<WorkerSlot, Set<String>> entry : workerCompMap.entrySet()) {
        Set<String> comps = entry.getValue();
        for (String comp1 : comps) {
            for (String comp2 : comps) {
                if (!comp1.equals(comp2) && constraintSolverConfig.getIncompatibleComponentSets().containsKey(comp1) && constraintSolverConfig.getIncompatibleComponentSets().get(comp1).contains(comp2)) {
                    String err = String.format("IncompatibleComponents: %s and %s on WorkerSlot: %s", comp1, comp2, entry.getKey());
                    errors.add(err);
                }
            }
        }
    }
    // Third check resources
    SchedulerAssignment schedulerAssignment = cluster.getAssignmentById(topo.getId());
    Map<ExecutorDetails, WorkerSlot> execToWorker = new HashMap<>();
    if (schedulerAssignment.getExecutorToSlot() != null) {
        execToWorker.putAll(schedulerAssignment.getExecutorToSlot());
    }
    Map<String, RasNode> nodes = RasNodes.getAllNodesFrom(cluster);
    Map<RasNode, Collection<ExecutorDetails>> nodeToExecs = new HashMap<>();
    for (Map.Entry<ExecutorDetails, WorkerSlot> entry : execToWorker.entrySet()) {
        ExecutorDetails exec = entry.getKey();
        WorkerSlot worker = entry.getValue();
        RasNode node = nodes.get(worker.getNodeId());
        if (node.getAvailableMemoryResources() < 0.0) {
            String err = String.format("Resource Exhausted: Found node %s with negative available memory %,.2f", node.getId(), node.getAvailableMemoryResources());
            errors.add(err);
            continue;
        }
        if (node.getAvailableCpuResources() < 0.0) {
            String err = String.format("Resource Exhausted: Found node %s with negative available CPU %,.2f", node.getId(), node.getAvailableCpuResources());
            errors.add(err);
            continue;
        }
        nodeToExecs.computeIfAbsent(node, (k) -> new HashSet<>()).add(exec);
    }
    for (Map.Entry<RasNode, Collection<ExecutorDetails>> entry : nodeToExecs.entrySet()) {
        RasNode node = entry.getKey();
        Collection<ExecutorDetails> execs = entry.getValue();
        double cpuUsed = 0.0;
        double memoryUsed = 0.0;
        for (ExecutorDetails exec : execs) {
            cpuUsed += topo.getTotalCpuReqTask(exec);
            memoryUsed += topo.getTotalMemReqTask(exec);
        }
        if (node.getAvailableCpuResources() != (node.getTotalCpuResources() - cpuUsed)) {
            String err = String.format("Incorrect CPU Resources: Node %s CPU available is %,.2f, expected %,.2f, " + "Executors scheduled on node: %s", node.getId(), node.getAvailableCpuResources(), (node.getTotalCpuResources() - cpuUsed), execs);
            errors.add(err);
        }
        if (node.getAvailableMemoryResources() != (node.getTotalMemoryResources() - memoryUsed)) {
            String err = String.format("Incorrect Memory Resources: Node %s Memory available is %,.2f, expected %,.2f, " + "Executors scheduled on node: %s", node.getId(), node.getAvailableMemoryResources(), (node.getTotalMemoryResources() - memoryUsed), execs);
            errors.add(err);
        }
    }
    if (!errors.isEmpty()) {
        LOG.error("Topology {} solution is invalid\n\t{}", topo.getName(), String.join("\n\t", errors));
    }
    return errors.isEmpty();
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Logger(org.slf4j.Logger) RasNode(org.apache.storm.scheduler.resource.RasNode) Collection(java.util.Collection) ExecSorterByConstraintSeverity(org.apache.storm.scheduler.resource.strategies.scheduling.sorter.ExecSorterByConstraintSeverity) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) SchedulingStatus(org.apache.storm.scheduler.resource.SchedulingStatus) RasNodes(org.apache.storm.scheduler.resource.RasNodes) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Cluster(org.apache.storm.scheduler.Cluster) SchedulingResult(org.apache.storm.scheduler.resource.SchedulingResult) List(java.util.List) VisibleForTesting(org.apache.storm.shade.com.google.common.annotations.VisibleForTesting) Map(java.util.Map) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) RasNode(org.apache.storm.scheduler.resource.RasNode) HashSet(java.util.HashSet) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Collection(java.util.Collection) HashMap(java.util.HashMap) Map(java.util.Map) VisibleForTesting(org.apache.storm.shade.com.google.common.annotations.VisibleForTesting)

Example 5 with RasNode

use of org.apache.storm.scheduler.resource.RasNode in project storm by apache.

the class NodeSorterHostProximity method createClusterSummarizedResources.

private ObjectResourcesSummary createClusterSummarizedResources() {
    ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
    rackIdToHosts.forEach((rackId, hostIds) -> {
        if (hostIds == null || hostIds.isEmpty()) {
            LOG.info("Ignoring Rack {} since it has no hosts", rackId);
        } else {
            ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
            for (String hostId : hostIds) {
                for (RasNode node : hostnameToNodes(hostId)) {
                    rack.availableResources.add(node.getTotalAvailableResources());
                    rack.totalResources.add(node.getTotalResources());
                }
            }
            clusterResourcesSummary.addObjectResourcesItem(rack);
        }
    });
    LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ], rackCnt={}, hostCnt={}", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall(), clusterResourcesSummary.getObjectResources().size(), rackIdToHosts.values().stream().mapToInt(x -> x.size()).sum());
    return clusterResourcesSummary;
}
Also used : ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) RasNode(org.apache.storm.scheduler.resource.RasNode)

Aggregations

RasNode (org.apache.storm.scheduler.resource.RasNode)10 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 LinkedList (java.util.LinkedList)5 ExecutorDetails (org.apache.storm.scheduler.ExecutorDetails)5 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)5 WorkerSlot (org.apache.storm.scheduler.WorkerSlot)5 SchedulingResult (org.apache.storm.scheduler.resource.SchedulingResult)5 HashSet (java.util.HashSet)4 Config (org.apache.storm.Config)4 Cluster (org.apache.storm.scheduler.Cluster)4 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)4 Logger (org.slf4j.Logger)4 LoggerFactory (org.slf4j.LoggerFactory)4 Set (java.util.Set)3 INodeSorter (org.apache.storm.scheduler.resource.strategies.scheduling.sorter.INodeSorter)3 NodeSorterHostProximity (org.apache.storm.scheduler.resource.strategies.scheduling.sorter.NodeSorterHostProximity)3 Collection (java.util.Collection)2