use of org.apache.storm.scheduler.SchedulerAssignment in project storm by apache.
the class IsolatedPool method canAdd.
@Override
public boolean canAdd(TopologyDetails td) {
//Only add topologies that are not sharing nodes with other topologies
String topId = td.getId();
SchedulerAssignment assignment = _cluster.getAssignmentById(topId);
if (assignment != null) {
for (WorkerSlot ws : assignment.getSlots()) {
Node n = _nodeIdToNode.get(ws.getNodeId());
if (n.getRunningTopologies().size() > 1) {
return false;
}
}
}
return true;
}
use of org.apache.storm.scheduler.SchedulerAssignment in project storm by apache.
the class Node method getAllNodesFrom.
public static Map<String, Node> getAllNodesFrom(Cluster cluster) {
Map<String, Node> nodeIdToNode = new HashMap<>();
for (SupervisorDetails sup : cluster.getSupervisors().values()) {
//Node ID and supervisor ID are the same.
String id = sup.getId();
boolean isAlive = !cluster.isBlackListed(id);
LOG.debug("Found a {} Node {} {}", isAlive ? "living" : "dead", id, sup.getAllPorts());
nodeIdToNode.put(id, new Node(id, sup.getAllPorts(), isAlive));
}
for (Entry<String, SchedulerAssignment> entry : cluster.getAssignments().entrySet()) {
String topId = entry.getValue().getTopologyId();
for (WorkerSlot ws : entry.getValue().getSlots()) {
String id = ws.getNodeId();
Node node = nodeIdToNode.get(id);
if (node == null) {
LOG.debug("Found an assigned slot on a dead supervisor {}", ws);
node = new Node(id, null, false);
nodeIdToNode.put(id, node);
}
if (!node.isAlive()) {
//The supervisor on the node down so add an orphaned slot to hold the unsupervised worker
node.addOrphanedSlot(ws);
}
if (node.assignInternal(ws, topId, true)) {
LOG.warn("Bad scheduling state for topology [" + topId + "], the slot " + ws + " assigned to multiple workers, un-assigning everything...");
node.free(ws, cluster, true);
}
}
}
return nodeIdToNode;
}
use of org.apache.storm.scheduler.SchedulerAssignment in project storm by apache.
the class RAS_Nodes method getAllNodesFrom.
public static Map<String, RAS_Node> getAllNodesFrom(Cluster cluster, Topologies topologies) {
//A map of node ids to node objects
Map<String, RAS_Node> nodeIdToNode = new HashMap<String, RAS_Node>();
//A map of assignments organized by node with the following format:
//{nodeId -> {topologyId -> {workerId -> {execs}}}}
Map<String, Map<String, Map<String, Collection<ExecutorDetails>>>> assignmentRelationshipMap = new HashMap<String, Map<String, Map<String, Collection<ExecutorDetails>>>>();
Map<String, Map<String, WorkerSlot>> workerIdToWorker = new HashMap<String, Map<String, WorkerSlot>>();
for (SchedulerAssignment assignment : cluster.getAssignments().values()) {
String topId = assignment.getTopologyId();
for (Map.Entry<WorkerSlot, Collection<ExecutorDetails>> entry : assignment.getSlotToExecutors().entrySet()) {
WorkerSlot slot = entry.getKey();
String nodeId = slot.getNodeId();
Collection<ExecutorDetails> execs = entry.getValue();
if (!assignmentRelationshipMap.containsKey(nodeId)) {
assignmentRelationshipMap.put(nodeId, new HashMap<String, Map<String, Collection<ExecutorDetails>>>());
workerIdToWorker.put(nodeId, new HashMap<String, WorkerSlot>());
}
workerIdToWorker.get(nodeId).put(slot.getId(), slot);
if (!assignmentRelationshipMap.get(nodeId).containsKey(topId)) {
assignmentRelationshipMap.get(nodeId).put(topId, new HashMap<String, Collection<ExecutorDetails>>());
}
if (!assignmentRelationshipMap.get(nodeId).get(topId).containsKey(slot.getId())) {
assignmentRelationshipMap.get(nodeId).get(topId).put(slot.getId(), new LinkedList<ExecutorDetails>());
}
assignmentRelationshipMap.get(nodeId).get(topId).get(slot.getId()).addAll(execs);
}
}
for (SupervisorDetails sup : cluster.getSupervisors().values()) {
//Initialize a worker slot for every port even if there is no assignment to it
for (int port : sup.getAllPorts()) {
WorkerSlot worker = new WorkerSlot(sup.getId(), port);
if (!workerIdToWorker.containsKey(sup.getId())) {
workerIdToWorker.put(sup.getId(), new HashMap<String, WorkerSlot>());
}
if (!workerIdToWorker.get(sup.getId()).containsKey(worker.getId())) {
workerIdToWorker.get(sup.getId()).put(worker.getId(), worker);
}
}
nodeIdToNode.put(sup.getId(), new RAS_Node(sup.getId(), sup, cluster, topologies, workerIdToWorker.get(sup.getId()), assignmentRelationshipMap.get(sup.getId())));
}
//Add in supervisors that might have crashed but workers are still alive
for (Map.Entry<String, Map<String, Map<String, Collection<ExecutorDetails>>>> entry : assignmentRelationshipMap.entrySet()) {
String nodeId = entry.getKey();
Map<String, Map<String, Collection<ExecutorDetails>>> assignments = entry.getValue();
if (!nodeIdToNode.containsKey(nodeId)) {
LOG.info("Found an assigned slot(s) on a dead supervisor {} with assignments {}", nodeId, assignments);
nodeIdToNode.put(nodeId, new RAS_Node(nodeId, null, cluster, topologies, workerIdToWorker.get(nodeId), assignments));
}
}
return nodeIdToNode;
}
use of org.apache.storm.scheduler.SchedulerAssignment in project storm by apache.
the class Nimbus method computeNewTopoToExecToNodePort.
private static Map<String, Map<List<Long>, List<Object>>> computeNewTopoToExecToNodePort(Map<String, SchedulerAssignment> schedAssignments, Map<String, Assignment> existingAssignments) {
Map<String, Map<List<Long>, List<Object>>> ret = computeTopoToExecToNodePort(schedAssignments);
// Print some useful information
if (existingAssignments != null && !existingAssignments.isEmpty()) {
for (Entry<String, Map<List<Long>, List<Object>>> entry : ret.entrySet()) {
String topoId = entry.getKey();
Map<List<Long>, List<Object>> execToNodePort = entry.getValue();
Assignment assignment = existingAssignments.get(topoId);
if (assignment == null) {
continue;
}
Map<List<Long>, NodeInfo> old = assignment.get_executor_node_port();
Map<List<Long>, List<Object>> reassigned = new HashMap<>();
for (Entry<List<Long>, List<Object>> execAndNodePort : execToNodePort.entrySet()) {
NodeInfo oldAssigned = old.get(execAndNodePort.getKey());
String node = (String) execAndNodePort.getValue().get(0);
Long port = (Long) execAndNodePort.getValue().get(1);
if (oldAssigned == null || !oldAssigned.get_node().equals(node) || !port.equals(oldAssigned.get_port_iterator().next())) {
reassigned.put(execAndNodePort.getKey(), execAndNodePort.getValue());
}
}
if (!reassigned.isEmpty()) {
int count = (new HashSet<>(execToNodePort.values())).size();
Set<List<Long>> reExecs = reassigned.keySet();
LOG.info("Reassigning {} to {} slots", topoId, count);
LOG.info("Reassign executors: {}", reExecs);
}
}
}
return ret;
}
use of org.apache.storm.scheduler.SchedulerAssignment in project storm by apache.
the class TestResourceAwareScheduler method testScheduleResilience.
@Test
public void testScheduleResilience() {
INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
Map<String, Number> resourceMap = new HashMap<>();
resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 400.0);
resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 2000.0);
Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(2, 2, resourceMap);
TopologyBuilder builder1 = new TopologyBuilder();
builder1.setSpout("wordSpout1", new TestWordSpout(), 3);
StormTopology stormTopology1 = builder1.createTopology();
Config config1 = new Config();
config1.putAll(defaultTopologyConf);
Map<ExecutorDetails, String> executorMap1 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology1);
TopologyDetails topology1 = new TopologyDetails("topology1", config1, stormTopology1, 3, executorMap1, 0);
TopologyBuilder builder2 = new TopologyBuilder();
builder2.setSpout("wordSpout2", new TestWordSpout(), 2);
StormTopology stormTopology2 = builder2.createTopology();
Config config2 = new Config();
config2.putAll(defaultTopologyConf);
// memory requirement is large enough so that two executors can not be fully assigned to one node
config2.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 1280.0);
Map<ExecutorDetails, String> executorMap2 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology2);
TopologyDetails topology2 = new TopologyDetails("topology2", config2, stormTopology2, 2, executorMap2, 0);
// Test1: When a worker fails, RAS does not alter existing assignments on healthy workers
Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config1);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
Map<String, TopologyDetails> topoMap = new HashMap<>();
topoMap.put(topology2.getId(), topology2);
Topologies topologies = new Topologies(topoMap);
rs.prepare(config1);
rs.schedule(topologies, cluster);
SchedulerAssignmentImpl assignment = (SchedulerAssignmentImpl) cluster.getAssignmentById(topology2.getId());
// pick a worker to mock as failed
WorkerSlot failedWorker = new ArrayList<WorkerSlot>(assignment.getSlots()).get(0);
Map<ExecutorDetails, WorkerSlot> executorToSlot = assignment.getExecutorToSlot();
List<ExecutorDetails> failedExecutors = new ArrayList<>();
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : executorToSlot.entrySet()) {
if (entry.getValue().equals(failedWorker)) {
failedExecutors.add(entry.getKey());
}
}
for (ExecutorDetails executor : failedExecutors) {
// remove executor details assigned to the failed worker
executorToSlot.remove(executor);
}
Map<ExecutorDetails, WorkerSlot> copyOfOldMapping = new HashMap<>(executorToSlot);
Set<ExecutorDetails> healthyExecutors = copyOfOldMapping.keySet();
rs.schedule(topologies, cluster);
SchedulerAssignment newAssignment = cluster.getAssignmentById(topology2.getId());
Map<ExecutorDetails, WorkerSlot> newExecutorToSlot = newAssignment.getExecutorToSlot();
for (ExecutorDetails executor : healthyExecutors) {
Assert.assertEquals(copyOfOldMapping.get(executor), newExecutorToSlot.get(executor));
}
Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology2.getId()));
// end of Test1
// Test2: When a supervisor fails, RAS does not alter existing assignments
executorToSlot = new HashMap<>();
executorToSlot.put(new ExecutorDetails(0, 0), new WorkerSlot("sup-0", 0));
executorToSlot.put(new ExecutorDetails(1, 1), new WorkerSlot("sup-0", 1));
executorToSlot.put(new ExecutorDetails(2, 2), new WorkerSlot("sup-1", 1));
Map<String, SchedulerAssignmentImpl> existingAssignments = new HashMap<>();
assignment = new SchedulerAssignmentImpl(topology1.getId(), executorToSlot);
existingAssignments.put(topology1.getId(), assignment);
copyOfOldMapping = new HashMap<>(executorToSlot);
Set<ExecutorDetails> existingExecutors = copyOfOldMapping.keySet();
Map<String, SupervisorDetails> supMap1 = new HashMap<>(supMap);
// mock the supervisor sup-0 as a failed supervisor
supMap1.remove("sup-0");
Cluster cluster1 = new Cluster(iNimbus, supMap1, existingAssignments, config1);
topoMap = new HashMap<>();
topoMap.put(topology1.getId(), topology1);
topologies = new Topologies(topoMap);
rs.schedule(topologies, cluster1);
newAssignment = cluster1.getAssignmentById(topology1.getId());
newExecutorToSlot = newAssignment.getExecutorToSlot();
for (ExecutorDetails executor : existingExecutors) {
Assert.assertEquals(copyOfOldMapping.get(executor), newExecutorToSlot.get(executor));
}
Assert.assertEquals("Fully Scheduled", cluster1.getStatusMap().get(topology1.getId()));
// end of Test2
// Test3: When a supervisor and a worker on it fails, RAS does not alter existing assignments
executorToSlot = new HashMap<>();
// the worker to orphan
executorToSlot.put(new ExecutorDetails(0, 0), new WorkerSlot("sup-0", 1));
// the worker that fails
executorToSlot.put(new ExecutorDetails(1, 1), new WorkerSlot("sup-0", 2));
// the healthy worker
executorToSlot.put(new ExecutorDetails(2, 2), new WorkerSlot("sup-1", 1));
existingAssignments = new HashMap<>();
assignment = new SchedulerAssignmentImpl(topology1.getId(), executorToSlot);
existingAssignments.put(topology1.getId(), assignment);
// delete one worker of sup-0 (failed) from topo1 assignment to enable actual schedule for testing
executorToSlot.remove(new ExecutorDetails(1, 1));
copyOfOldMapping = new HashMap<>(executorToSlot);
// namely the two eds on the orphaned worker and the healthy worker
existingExecutors = copyOfOldMapping.keySet();
supMap1 = new HashMap<>(supMap);
// mock the supervisor sup-0 as a failed supervisor
supMap1.remove("sup-0");
cluster1 = new Cluster(iNimbus, supMap1, existingAssignments, config1);
topoMap = new HashMap<>();
topoMap.put(topology1.getId(), topology1);
topologies = new Topologies(topoMap);
rs.schedule(topologies, cluster1);
newAssignment = cluster1.getAssignmentById(topology1.getId());
newExecutorToSlot = newAssignment.getExecutorToSlot();
for (ExecutorDetails executor : existingExecutors) {
Assert.assertEquals(copyOfOldMapping.get(executor), newExecutorToSlot.get(executor));
}
Assert.assertEquals("Fully Scheduled", cluster1.getStatusMap().get(topology1.getId()));
// end of Test3
// Test4: Scheduling a new topology does not disturb other assignments unnecessarily
cluster1 = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config1);
topoMap = new HashMap<>();
topoMap.put(topology1.getId(), topology1);
topologies = new Topologies(topoMap);
rs.schedule(topologies, cluster1);
assignment = (SchedulerAssignmentImpl) cluster1.getAssignmentById(topology1.getId());
executorToSlot = assignment.getExecutorToSlot();
copyOfOldMapping = new HashMap<>(executorToSlot);
topoMap.put(topology2.getId(), topology2);
topologies = new Topologies(topoMap);
rs.schedule(topologies, cluster1);
newAssignment = (SchedulerAssignmentImpl) cluster1.getAssignmentById(topology1.getId());
newExecutorToSlot = newAssignment.getExecutorToSlot();
for (ExecutorDetails executor : copyOfOldMapping.keySet()) {
Assert.assertEquals(copyOfOldMapping.get(executor), newExecutorToSlot.get(executor));
}
Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster1.getStatusMap().get(topology1.getId()));
Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster1.getStatusMap().get(topology2.getId()));
}
Aggregations