use of org.apache.storm.scheduler.WorkerSlot in project storm by apache.
the class IsolationScheduler method schedule.
// get host -> all assignable worker slots for non-blacklisted machines (assigned or not assigned)
// will then have a list of machines that need to be assigned (machine -> [topology, list of list of executors])
// match each spec to a machine (who has the right number of workers), free everything else on that machine and assign those slots (do one topology at a time)
// blacklist all machines who had production slots defined
// log isolated topologies who weren't able to get enough slots / machines
// run default scheduler on isolated topologies that didn't have enough slots + non-isolated topologies on remaining machines
// set blacklist to what it was initially
@Override
public void schedule(Topologies topologies, Cluster cluster) {
Set<String> origBlacklist = cluster.getBlacklistedHosts();
List<TopologyDetails> isoTopologies = isolatedTopologies(topologies.getTopologies());
Set<String> isoIds = extractTopologyIds(isoTopologies);
Map<String, Set<Set<ExecutorDetails>>> topologyWorkerSpecs = topologyWorkerSpecs(isoTopologies);
Map<String, Map<Integer, Integer>> topologyMachineDistributions = topologyMachineDistributions(isoTopologies);
Map<String, List<AssignmentInfo>> hostAssignments = hostAssignments(cluster);
for (Map.Entry<String, List<AssignmentInfo>> entry : hostAssignments.entrySet()) {
List<AssignmentInfo> assignments = entry.getValue();
String topologyId = assignments.get(0).getTopologyId();
Map<Integer, Integer> distribution = topologyMachineDistributions.get(topologyId);
Set<Set<ExecutorDetails>> workerSpecs = topologyWorkerSpecs.get(topologyId);
int numWorkers = assignments.size();
if (isoIds.contains(topologyId) && checkAssignmentTopology(assignments, topologyId) && distribution.containsKey(numWorkers) && checkAssignmentWorkerSpecs(assignments, workerSpecs)) {
decrementDistribution(distribution, numWorkers);
for (AssignmentInfo ass : assignments) {
workerSpecs.remove(ass.getExecutors());
}
cluster.blacklistHost(entry.getKey());
} else {
for (AssignmentInfo ass : assignments) {
if (isoIds.contains(ass.getTopologyId())) {
cluster.freeSlot(ass.getWorkerSlot());
}
}
}
}
Map<String, Set<WorkerSlot>> hostUsedSlots = hostToUsedSlots(cluster);
LinkedList<HostAssignableSlots> hss = hostAssignableSlots(cluster);
for (Map.Entry<String, Set<Set<ExecutorDetails>>> entry : topologyWorkerSpecs.entrySet()) {
String topologyId = entry.getKey();
Set<Set<ExecutorDetails>> executorSet = entry.getValue();
List<Integer> workerNum = distributionToSortedAmounts(topologyMachineDistributions.get(topologyId));
for (Integer num : workerNum) {
HostAssignableSlots hostSlots = hss.peek();
List<WorkerSlot> slot = hostSlots != null ? hostSlots.getWorkerSlots() : null;
if (slot != null && slot.size() >= num) {
hss.poll();
cluster.freeSlots(hostUsedSlots.get(hostSlots.getHostName()));
for (WorkerSlot tmpSlot : slot.subList(0, num)) {
Set<ExecutorDetails> executor = removeElemFromExecutorsSet(executorSet);
cluster.assign(tmpSlot, topologyId, executor);
}
cluster.blacklistHost(hostSlots.getHostName());
}
}
}
List<String> failedTopologyIds = extractFailedTopologyIds(topologyWorkerSpecs);
if (failedTopologyIds.size() > 0) {
LOG.warn("Unable to isolate topologies " + failedTopologyIds + ". No machine had enough worker slots to run the remaining workers for these topologies. " + "Clearing all other resources and will wait for enough resources for " + "isolated topologies before allocating any other resources.");
// clear workers off all hosts that are not blacklisted
Map<String, Set<WorkerSlot>> usedSlots = hostToUsedSlots(cluster);
Set<Map.Entry<String, Set<WorkerSlot>>> entries = usedSlots.entrySet();
for (Map.Entry<String, Set<WorkerSlot>> entry : entries) {
if (!cluster.isBlacklistedHost(entry.getKey())) {
cluster.freeSlots(entry.getValue());
}
}
} else {
// run default scheduler on non-isolated topologies
Set<String> allocatedTopologies = allocatedTopologies(topologyWorkerSpecs);
Topologies leftOverTopologies = leftoverTopologies(topologies, allocatedTopologies);
DefaultScheduler.defaultSchedule(leftOverTopologies, cluster);
}
cluster.setBlacklistedHosts(origBlacklist);
}
use of org.apache.storm.scheduler.WorkerSlot in project storm by apache.
the class IsolationScheduler method hostToUsedSlots.
private Map<String, Set<WorkerSlot>> hostToUsedSlots(Cluster cluster) {
Collection<WorkerSlot> usedSlots = cluster.getUsedSlots();
Map<String, Set<WorkerSlot>> hostUsedSlots = new HashMap<String, Set<WorkerSlot>>();
for (WorkerSlot slot : usedSlots) {
String host = cluster.getHost(slot.getNodeId());
Set<WorkerSlot> slots = hostUsedSlots.get(host);
if (slots == null) {
slots = new HashSet<WorkerSlot>();
hostUsedSlots.put(host, slots);
}
slots.add(slot);
}
return hostUsedSlots;
}
use of org.apache.storm.scheduler.WorkerSlot in project storm by apache.
the class IsolatedPool method canAdd.
@Override
public boolean canAdd(TopologyDetails td) {
//Only add topologies that are not sharing nodes with other topologies
String topId = td.getId();
SchedulerAssignment assignment = _cluster.getAssignmentById(topId);
if (assignment != null) {
for (WorkerSlot ws : assignment.getSlots()) {
Node n = _nodeIdToNode.get(ws.getNodeId());
if (n.getRunningTopologies().size() > 1) {
return false;
}
}
}
return true;
}
use of org.apache.storm.scheduler.WorkerSlot in project storm by apache.
the class Node method getAllNodesFrom.
public static Map<String, Node> getAllNodesFrom(Cluster cluster) {
Map<String, Node> nodeIdToNode = new HashMap<>();
for (SupervisorDetails sup : cluster.getSupervisors().values()) {
//Node ID and supervisor ID are the same.
String id = sup.getId();
boolean isAlive = !cluster.isBlackListed(id);
LOG.debug("Found a {} Node {} {}", isAlive ? "living" : "dead", id, sup.getAllPorts());
nodeIdToNode.put(id, new Node(id, sup.getAllPorts(), isAlive));
}
for (Entry<String, SchedulerAssignment> entry : cluster.getAssignments().entrySet()) {
String topId = entry.getValue().getTopologyId();
for (WorkerSlot ws : entry.getValue().getSlots()) {
String id = ws.getNodeId();
Node node = nodeIdToNode.get(id);
if (node == null) {
LOG.debug("Found an assigned slot on a dead supervisor {}", ws);
node = new Node(id, null, false);
nodeIdToNode.put(id, node);
}
if (!node.isAlive()) {
//The supervisor on the node down so add an orphaned slot to hold the unsupervised worker
node.addOrphanedSlot(ws);
}
if (node.assignInternal(ws, topId, true)) {
LOG.warn("Bad scheduling state for topology [" + topId + "], the slot " + ws + " assigned to multiple workers, un-assigning everything...");
node.free(ws, cluster, true);
}
}
}
return nodeIdToNode;
}
use of org.apache.storm.scheduler.WorkerSlot in project storm by apache.
the class RAS_Nodes method freeSlots.
/**
*
* @param workerSlots
*/
public void freeSlots(Collection<WorkerSlot> workerSlots) {
for (RAS_Node node : nodeMap.values()) {
for (WorkerSlot ws : node.getUsedSlots()) {
if (workerSlots.contains(ws)) {
LOG.debug("freeing ws {} on node {}", ws, node);
node.free(ws);
}
}
}
}
Aggregations