Search in sources :

Example 41 with ExecutorDetails

use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.

the class DefaultResourceAwareStrategy method schedule.

public SchedulingResult schedule(TopologyDetails td) {
    if (_nodes.getNodes().size() <= 0) {
        LOG.warn("No available nodes to schedule tasks on!");
        return SchedulingResult.failure(SchedulingStatus.FAIL_NOT_ENOUGH_RESOURCES, "No available nodes to schedule tasks on!");
    }
    Collection<ExecutorDetails> unassignedExecutors = new HashSet<ExecutorDetails>(_cluster.getUnassignedExecutors(td));
    Map<WorkerSlot, Collection<ExecutorDetails>> schedulerAssignmentMap = new HashMap<>();
    LOG.debug("ExecutorsNeedScheduling: {}", unassignedExecutors);
    Collection<ExecutorDetails> scheduledTasks = new ArrayList<>();
    List<Component> spouts = this.getSpouts(td);
    if (spouts.size() == 0) {
        LOG.error("Cannot find a Spout!");
        return SchedulingResult.failure(SchedulingStatus.FAIL_INVALID_TOPOLOGY, "Cannot find a Spout!");
    }
    //order executors to be scheduled
    List<ExecutorDetails> orderedExecutors = orderExecutors(td, unassignedExecutors);
    Collection<ExecutorDetails> executorsNotScheduled = new HashSet<>(unassignedExecutors);
    for (ExecutorDetails exec : orderedExecutors) {
        LOG.debug("\n\nAttempting to schedule: {} of component {}[ REQ {} ]", exec, td.getExecutorToComponent().get(exec), td.getTaskResourceReqList(exec));
        scheduleExecutor(exec, td, schedulerAssignmentMap, scheduledTasks);
    }
    executorsNotScheduled.removeAll(scheduledTasks);
    LOG.debug("/* Scheduling left over task (most likely sys tasks) */");
    // schedule left over system tasks
    for (ExecutorDetails exec : executorsNotScheduled) {
        scheduleExecutor(exec, td, schedulerAssignmentMap, scheduledTasks);
    }
    SchedulingResult result;
    executorsNotScheduled.removeAll(scheduledTasks);
    if (executorsNotScheduled.size() > 0) {
        LOG.error("Not all executors successfully scheduled: {}", executorsNotScheduled);
        schedulerAssignmentMap = null;
        result = SchedulingResult.failure(SchedulingStatus.FAIL_NOT_ENOUGH_RESOURCES, (td.getExecutors().size() - unassignedExecutors.size()) + "/" + td.getExecutors().size() + " executors scheduled");
    } else {
        LOG.debug("All resources successfully scheduled!");
        result = SchedulingResult.successWithMsg(schedulerAssignmentMap, "Fully Scheduled by DefaultResourceAwareStrategy");
    }
    if (schedulerAssignmentMap == null) {
        LOG.error("Topology {} not successfully scheduled!", td.getId());
    }
    return result;
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) ArrayList(java.util.ArrayList) Collection(java.util.Collection) Component(org.apache.storm.scheduler.resource.Component) SchedulingResult(org.apache.storm.scheduler.resource.SchedulingResult) HashSet(java.util.HashSet)

Example 42 with ExecutorDetails

use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.

the class DefaultResourceAwareStrategy method sortNodes.

/**
     * Sorted Nodes
     *
     * @param availNodes            a list of all the nodes we want to sort
     * @param rackId                the rack id availNodes are a part of
     * @param topoId                the topology that we are trying to schedule
     * @param scheduleAssignmentMap calculated assignments so far
     * @return a sorted list of nodes
     * <p>
     * Nodes are sorted by two criteria. 1) the number executors of the topology that needs to be scheduled is already on the node in descending order.
     * The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on the same node as the existing executors of the topology.
     * 2) the subordinate/subservient resource availability percentage of a node in descending order
     * We calculate the resource availability percentage by dividing the resource availability on the node by the resource availability of the entire rack
     * By doing this calculation, nodes that have exhausted or little of one of the resources mentioned above will be ranked after nodes that have more balanced resource availability.
     * So we will be less likely to pick a node that have a lot of one resource but a low amount of another.
     */
private TreeSet<ObjectResources> sortNodes(List<RAS_Node> availNodes, String rackId, final String topoId, final Map<WorkerSlot, Collection<ExecutorDetails>> scheduleAssignmentMap) {
    AllResources allResources = new AllResources("RACK");
    List<ObjectResources> nodes = allResources.objectResources;
    final Map<String, String> nodeIdToRackId = new HashMap<String, String>();
    for (RAS_Node ras_node : availNodes) {
        String nodeId = ras_node.getId();
        ObjectResources node = new ObjectResources(nodeId);
        double availMem = ras_node.getAvailableMemoryResources();
        double availCpu = ras_node.getAvailableCpuResources();
        int freeSlots = ras_node.totalSlotsFree();
        double totalMem = ras_node.getTotalMemoryResources();
        double totalCpu = ras_node.getTotalCpuResources();
        int totalSlots = ras_node.totalSlots();
        node.availMem = availMem;
        node.totalMem = totalMem;
        node.availCpu = availCpu;
        node.totalCpu = totalCpu;
        nodes.add(node);
        allResources.availMemResourcesOverall += availMem;
        allResources.availCpuResourcesOverall += availCpu;
        allResources.totalMemResourcesOverall += totalMem;
        allResources.totalCpuResourcesOverall += totalCpu;
    }
    LOG.debug("Rack {}: Overall Avail [ CPU {} MEM {} ] Total [ CPU {} MEM {} ]", rackId, allResources.availCpuResourcesOverall, allResources.availMemResourcesOverall, allResources.totalCpuResourcesOverall, allResources.totalMemResourcesOverall);
    return sortObjectResources(allResources, new ExistingScheduleFunc() {

        @Override
        public int getNumExistingSchedule(String objectId) {
            //Get execs already assigned in rack
            Collection<ExecutorDetails> execs = new LinkedList<ExecutorDetails>();
            if (_cluster.getAssignmentById(topoId) != null) {
                for (Map.Entry<ExecutorDetails, WorkerSlot> entry : _cluster.getAssignmentById(topoId).getExecutorToSlot().entrySet()) {
                    WorkerSlot workerSlot = entry.getValue();
                    ExecutorDetails exec = entry.getKey();
                    if (workerSlot.getNodeId().equals(objectId)) {
                        execs.add(exec);
                    }
                }
            }
            // get execs already scheduled in the current scheduling
            for (Map.Entry<WorkerSlot, Collection<ExecutorDetails>> entry : scheduleAssignmentMap.entrySet()) {
                WorkerSlot workerSlot = entry.getKey();
                if (workerSlot.getNodeId().equals(objectId)) {
                    execs.addAll(entry.getValue());
                }
            }
            return execs.size();
        }
    });
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) RAS_Node(org.apache.storm.scheduler.resource.RAS_Node) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Collection(java.util.Collection)

Example 43 with ExecutorDetails

use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.

the class DefaultResourceAwareStrategy method scheduleExecutor.

/**
     * Schedule executor exec from topology td
     *
     * @param exec the executor to schedule
     * @param td the topology executor exec is a part of
     * @param schedulerAssignmentMap the assignments already calculated
     * @param scheduledTasks executors that have been scheduled
     */
private void scheduleExecutor(ExecutorDetails exec, TopologyDetails td, Map<WorkerSlot, Collection<ExecutorDetails>> schedulerAssignmentMap, Collection<ExecutorDetails> scheduledTasks) {
    WorkerSlot targetSlot = this.findWorkerForExec(exec, td, schedulerAssignmentMap);
    if (targetSlot != null) {
        RAS_Node targetNode = this.idToNode(targetSlot.getNodeId());
        if (!schedulerAssignmentMap.containsKey(targetSlot)) {
            schedulerAssignmentMap.put(targetSlot, new LinkedList<ExecutorDetails>());
        }
        schedulerAssignmentMap.get(targetSlot).add(exec);
        targetNode.consumeResourcesforTask(exec, td);
        scheduledTasks.add(exec);
        LOG.debug("TASK {} assigned to Node: {} avail [ mem: {} cpu: {} ] total [ mem: {} cpu: {} ] on slot: {} on Rack: {}", exec, targetNode.getHostname(), targetNode.getAvailableMemoryResources(), targetNode.getAvailableCpuResources(), targetNode.getTotalMemoryResources(), targetNode.getTotalCpuResources(), targetSlot, nodeToRack(targetNode));
    } else {
        LOG.error("Not Enough Resources to schedule Task {}", exec);
    }
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) RAS_Node(org.apache.storm.scheduler.resource.RAS_Node)

Example 44 with ExecutorDetails

use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.

the class IsolationScheduler method schedule.

// get host -> all assignable worker slots for non-blacklisted machines (assigned or not assigned)
// will then have a list of machines that need to be assigned (machine -> [topology, list of list of executors])
// match each spec to a machine (who has the right number of workers), free everything else on that machine and assign those slots (do one topology at a time)
// blacklist all machines who had production slots defined
// log isolated topologies who weren't able to get enough slots / machines
// run default scheduler on isolated topologies that didn't have enough slots + non-isolated topologies on remaining machines
// set blacklist to what it was initially
@Override
public void schedule(Topologies topologies, Cluster cluster) {
    Set<String> origBlacklist = cluster.getBlacklistedHosts();
    List<TopologyDetails> isoTopologies = isolatedTopologies(topologies.getTopologies());
    Set<String> isoIds = extractTopologyIds(isoTopologies);
    Map<String, Set<Set<ExecutorDetails>>> topologyWorkerSpecs = topologyWorkerSpecs(isoTopologies);
    Map<String, Map<Integer, Integer>> topologyMachineDistributions = topologyMachineDistributions(isoTopologies);
    Map<String, List<AssignmentInfo>> hostAssignments = hostAssignments(cluster);
    for (Map.Entry<String, List<AssignmentInfo>> entry : hostAssignments.entrySet()) {
        List<AssignmentInfo> assignments = entry.getValue();
        String topologyId = assignments.get(0).getTopologyId();
        Map<Integer, Integer> distribution = topologyMachineDistributions.get(topologyId);
        Set<Set<ExecutorDetails>> workerSpecs = topologyWorkerSpecs.get(topologyId);
        int numWorkers = assignments.size();
        if (isoIds.contains(topologyId) && checkAssignmentTopology(assignments, topologyId) && distribution.containsKey(numWorkers) && checkAssignmentWorkerSpecs(assignments, workerSpecs)) {
            decrementDistribution(distribution, numWorkers);
            for (AssignmentInfo ass : assignments) {
                workerSpecs.remove(ass.getExecutors());
            }
            cluster.blacklistHost(entry.getKey());
        } else {
            for (AssignmentInfo ass : assignments) {
                if (isoIds.contains(ass.getTopologyId())) {
                    cluster.freeSlot(ass.getWorkerSlot());
                }
            }
        }
    }
    Map<String, Set<WorkerSlot>> hostUsedSlots = hostToUsedSlots(cluster);
    LinkedList<HostAssignableSlots> hss = hostAssignableSlots(cluster);
    for (Map.Entry<String, Set<Set<ExecutorDetails>>> entry : topologyWorkerSpecs.entrySet()) {
        String topologyId = entry.getKey();
        Set<Set<ExecutorDetails>> executorSet = entry.getValue();
        List<Integer> workerNum = distributionToSortedAmounts(topologyMachineDistributions.get(topologyId));
        for (Integer num : workerNum) {
            HostAssignableSlots hostSlots = hss.peek();
            List<WorkerSlot> slot = hostSlots != null ? hostSlots.getWorkerSlots() : null;
            if (slot != null && slot.size() >= num) {
                hss.poll();
                cluster.freeSlots(hostUsedSlots.get(hostSlots.getHostName()));
                for (WorkerSlot tmpSlot : slot.subList(0, num)) {
                    Set<ExecutorDetails> executor = removeElemFromExecutorsSet(executorSet);
                    cluster.assign(tmpSlot, topologyId, executor);
                }
                cluster.blacklistHost(hostSlots.getHostName());
            }
        }
    }
    List<String> failedTopologyIds = extractFailedTopologyIds(topologyWorkerSpecs);
    if (failedTopologyIds.size() > 0) {
        LOG.warn("Unable to isolate topologies " + failedTopologyIds + ". No machine had enough worker slots to run the remaining workers for these topologies. " + "Clearing all other resources and will wait for enough resources for " + "isolated topologies before allocating any other resources.");
        // clear workers off all hosts that are not blacklisted
        Map<String, Set<WorkerSlot>> usedSlots = hostToUsedSlots(cluster);
        Set<Map.Entry<String, Set<WorkerSlot>>> entries = usedSlots.entrySet();
        for (Map.Entry<String, Set<WorkerSlot>> entry : entries) {
            if (!cluster.isBlacklistedHost(entry.getKey())) {
                cluster.freeSlots(entry.getValue());
            }
        }
    } else {
        // run default scheduler on non-isolated topologies
        Set<String> allocatedTopologies = allocatedTopologies(topologyWorkerSpecs);
        Topologies leftOverTopologies = leftoverTopologies(topologies, allocatedTopologies);
        DefaultScheduler.defaultSchedule(leftOverTopologies, cluster);
    }
    cluster.setBlacklistedHosts(origBlacklist);
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) Set(java.util.Set) HashSet(java.util.HashSet) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Topologies(org.apache.storm.scheduler.Topologies) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 45 with ExecutorDetails

use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.

the class RAS_Node method getMemoryUsedByWorker.

/**
     * get the amount of memory used by a worker
     */
public double getMemoryUsedByWorker(WorkerSlot ws) {
    TopologyDetails topo = findTopologyUsingWorker(ws);
    if (topo == null) {
        return 0.0;
    }
    Collection<ExecutorDetails> execs = getExecutors(ws, _cluster);
    double totalMemoryUsed = 0.0;
    for (ExecutorDetails exec : execs) {
        totalMemoryUsed += topo.getTotalMemReqTask(exec);
    }
    return totalMemoryUsed;
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) TopologyDetails(org.apache.storm.scheduler.TopologyDetails)

Aggregations

ExecutorDetails (org.apache.storm.scheduler.ExecutorDetails)72 HashMap (java.util.HashMap)50 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)42 WorkerSlot (org.apache.storm.scheduler.WorkerSlot)41 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)36 ArrayList (java.util.ArrayList)35 Map (java.util.Map)34 Cluster (org.apache.storm.scheduler.Cluster)31 Config (org.apache.storm.Config)29 HashSet (java.util.HashSet)28 List (java.util.List)28 SupervisorDetails (org.apache.storm.scheduler.SupervisorDetails)28 Topologies (org.apache.storm.scheduler.Topologies)23 LinkedList (java.util.LinkedList)21 INimbus (org.apache.storm.scheduler.INimbus)21 Collection (java.util.Collection)20 StormMetricsRegistry (org.apache.storm.metric.StormMetricsRegistry)19 StormTopology (org.apache.storm.generated.StormTopology)18 TestUtilsForResourceAwareScheduler (org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler)18 ResourceMetrics (org.apache.storm.scheduler.resource.normalization.ResourceMetrics)18