use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.
the class TestResourceAwareScheduler method TestFaultTolerance.
/**
* Test correct behaviour when a supervisor dies. Check if the scheduler handles it correctly and evicts the correct
* topology when rescheduling the executors from the died supervisor
*/
@Test
public void TestFaultTolerance() {
INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
Map<String, Number> resourceMap = new HashMap<String, Number>();
resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 100.0);
resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 1000.0);
Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(6, 4, resourceMap);
Config config = new Config();
config.putAll(Utils.readDefaultConfig());
config.put(Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY, org.apache.storm.scheduler.resource.strategies.eviction.DefaultEvictionStrategy.class.getName());
config.put(Config.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy.class.getName());
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy.class.getName());
config.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 100.0);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 500);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 500);
Map<String, Map<String, Number>> resourceUserPool = new HashMap<String, Map<String, Number>>();
resourceUserPool.put("jerry", new HashMap<String, Number>());
resourceUserPool.get("jerry").put("cpu", 50.0);
resourceUserPool.get("jerry").put("memory", 500.0);
resourceUserPool.put("bobby", new HashMap<String, Number>());
resourceUserPool.get("bobby").put("cpu", 200.0);
resourceUserPool.get("bobby").put("memory", 2000.0);
resourceUserPool.put("derek", new HashMap<String, Number>());
resourceUserPool.get("derek").put("cpu", 100.0);
resourceUserPool.get("derek").put("memory", 1000.0);
config.put(Config.RESOURCE_AWARE_SCHEDULER_USER_POOLS, resourceUserPool);
Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config);
config.put(Config.TOPOLOGY_SUBMITTER_USER, "jerry");
TopologyDetails topo1 = TestUtilsForResourceAwareScheduler.getTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 20);
TopologyDetails topo2 = TestUtilsForResourceAwareScheduler.getTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20);
config.put(Config.TOPOLOGY_SUBMITTER_USER, "bobby");
TopologyDetails topo3 = TestUtilsForResourceAwareScheduler.getTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 10);
TopologyDetails topo4 = TestUtilsForResourceAwareScheduler.getTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10);
config.put(Config.TOPOLOGY_SUBMITTER_USER, "derek");
TopologyDetails topo5 = TestUtilsForResourceAwareScheduler.getTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29);
TopologyDetails topo6 = TestUtilsForResourceAwareScheduler.getTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10);
Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
topoMap.put(topo1.getId(), topo1);
topoMap.put(topo2.getId(), topo2);
topoMap.put(topo3.getId(), topo3);
topoMap.put(topo4.getId(), topo4);
topoMap.put(topo5.getId(), topo5);
topoMap.put(topo6.getId(), topo6);
Topologies topologies = new Topologies(topoMap);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
rs.prepare(config);
rs.schedule(topologies, cluster);
for (TopologyDetails topo : rs.getUser("jerry").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 2, rs.getUser("jerry").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("jerry").getTopologiesPending().size());
Assert.assertEquals("# of attempted topologies", 0, rs.getUser("jerry").getTopologiesAttempted().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("jerry").getTopologiesInvalid().size());
for (TopologyDetails topo : rs.getUser("derek").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 2, rs.getUser("derek").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("derek").getTopologiesPending().size());
Assert.assertEquals("# of attempted topologies", 0, rs.getUser("derek").getTopologiesAttempted().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("derek").getTopologiesInvalid().size());
for (TopologyDetails topo : rs.getUser("bobby").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 2, rs.getUser("bobby").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("bobby").getTopologiesPending().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("bobby").getTopologiesInvalid().size());
Assert.assertEquals("# of attempted topologies", 0, rs.getUser("bobby").getTopologiesAttempted().size());
//fail supervisor
SupervisorDetails supFailed = cluster.getSupervisors().values().iterator().next();
LOG.info("/***** failing supervisor: {} ****/", supFailed.getHost());
supMap.remove(supFailed.getId());
Map<String, SchedulerAssignmentImpl> newAssignments = new HashMap<String, SchedulerAssignmentImpl>();
for (Map.Entry<String, SchedulerAssignment> topoToAssignment : cluster.getAssignments().entrySet()) {
String topoId = topoToAssignment.getKey();
SchedulerAssignment assignment = topoToAssignment.getValue();
Map<ExecutorDetails, WorkerSlot> executorToSlots = new HashMap<ExecutorDetails, WorkerSlot>();
for (Map.Entry<ExecutorDetails, WorkerSlot> execToWorker : assignment.getExecutorToSlot().entrySet()) {
ExecutorDetails exec = execToWorker.getKey();
WorkerSlot ws = execToWorker.getValue();
if (!ws.getNodeId().equals(supFailed.getId())) {
executorToSlots.put(exec, ws);
}
}
newAssignments.put(topoId, new SchedulerAssignmentImpl(topoId, executorToSlots));
}
Map<String, String> statusMap = cluster.getStatusMap();
cluster = new Cluster(iNimbus, supMap, newAssignments, config);
cluster.setStatusMap(statusMap);
rs.schedule(topologies, cluster);
//Supervisor failed contains a executor from topo-6 of user derek. Should evict a topology from user jerry since user will be above resource guarantee more so than user derek
for (TopologyDetails topo : rs.getUser("jerry").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 1, rs.getUser("jerry").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("jerry").getTopologiesPending().size());
Assert.assertEquals("# of attempted topologies", 1, rs.getUser("jerry").getTopologiesAttempted().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("jerry").getTopologiesInvalid().size());
for (TopologyDetails topo : rs.getUser("derek").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 2, rs.getUser("derek").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("derek").getTopologiesPending().size());
Assert.assertEquals("# of attempted topologies", 0, rs.getUser("derek").getTopologiesAttempted().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("derek").getTopologiesInvalid().size());
for (TopologyDetails topo : rs.getUser("bobby").getTopologiesRunning()) {
Assert.assertTrue("Assert scheduling topology success", TestUtilsForResourceAwareScheduler.assertStatusSuccess(cluster.getStatusMap().get(topo.getId())));
}
Assert.assertEquals("# of running topologies", 2, rs.getUser("bobby").getTopologiesRunning().size());
Assert.assertEquals("# of pending topologies", 0, rs.getUser("bobby").getTopologiesPending().size());
Assert.assertEquals("# of invalid topologies", 0, rs.getUser("bobby").getTopologiesInvalid().size());
Assert.assertEquals("# of attempted topologies", 0, rs.getUser("bobby").getTopologiesAttempted().size());
}
use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.
the class TestUtilsForResourceAwareScheduler method getSupervisorToCpuUsage.
public static Map<SupervisorDetails, Double> getSupervisorToCpuUsage(Cluster cluster, Topologies topologies) {
Map<SupervisorDetails, Double> superToCpu = new HashMap<>();
Collection<SchedulerAssignment> assignments = cluster.getAssignments().values();
Collection<SupervisorDetails> supervisors = cluster.getSupervisors().values();
for (SupervisorDetails supervisor : supervisors) {
superToCpu.put(supervisor, 0.0);
}
for (SchedulerAssignment assignment : assignments) {
Map<ExecutorDetails, SupervisorDetails> executorToSupervisor = new HashMap<>();
Map<SupervisorDetails, List<ExecutorDetails>> supervisorToExecutors = new HashMap<>();
TopologyDetails topology = topologies.getById(assignment.getTopologyId());
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : assignment.getExecutorToSlot().entrySet()) {
executorToSupervisor.put(entry.getKey(), cluster.getSupervisorById(entry.getValue().getNodeId()));
}
for (Map.Entry<ExecutorDetails, SupervisorDetails> entry : executorToSupervisor.entrySet()) {
List<ExecutorDetails> executorsOnSupervisor = supervisorToExecutors.get(entry.getValue());
if (executorsOnSupervisor == null) {
executorsOnSupervisor = new ArrayList<>();
supervisorToExecutors.put(entry.getValue(), executorsOnSupervisor);
}
executorsOnSupervisor.add(entry.getKey());
}
for (Map.Entry<SupervisorDetails, List<ExecutorDetails>> entry : supervisorToExecutors.entrySet()) {
Double supervisorUsedCpu = 0.0;
for (ExecutorDetails executor : entry.getValue()) {
supervisorUsedCpu += topology.getTotalCpuReqTask(executor);
}
superToCpu.put(entry.getKey(), superToCpu.get(entry.getKey()) + supervisorUsedCpu);
}
}
return superToCpu;
}
use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.
the class TestUtilsForResourceAwareScheduler method getSupervisorToMemoryUsage.
public static Map<SupervisorDetails, Double> getSupervisorToMemoryUsage(Cluster cluster, Topologies topologies) {
Map<SupervisorDetails, Double> superToMem = new HashMap<>();
Collection<SchedulerAssignment> assignments = cluster.getAssignments().values();
Collection<SupervisorDetails> supervisors = cluster.getSupervisors().values();
for (SupervisorDetails supervisor : supervisors) {
superToMem.put(supervisor, 0.0);
}
for (SchedulerAssignment assignment : assignments) {
Map<ExecutorDetails, SupervisorDetails> executorToSupervisor = new HashMap<>();
Map<SupervisorDetails, List<ExecutorDetails>> supervisorToExecutors = new HashMap<>();
TopologyDetails topology = topologies.getById(assignment.getTopologyId());
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : assignment.getExecutorToSlot().entrySet()) {
executorToSupervisor.put(entry.getKey(), cluster.getSupervisorById(entry.getValue().getNodeId()));
}
for (Map.Entry<ExecutorDetails, SupervisorDetails> entry : executorToSupervisor.entrySet()) {
List<ExecutorDetails> executorsOnSupervisor = supervisorToExecutors.get(entry.getValue());
if (executorsOnSupervisor == null) {
executorsOnSupervisor = new ArrayList<>();
supervisorToExecutors.put(entry.getValue(), executorsOnSupervisor);
}
executorsOnSupervisor.add(entry.getKey());
}
for (Map.Entry<SupervisorDetails, List<ExecutorDetails>> entry : supervisorToExecutors.entrySet()) {
Double supervisorUsedMemory = 0.0;
for (ExecutorDetails executor : entry.getValue()) {
supervisorUsedMemory += topology.getTotalMemReqTask(executor);
}
superToMem.put(entry.getKey(), superToMem.get(entry.getKey()) + supervisorUsedMemory);
}
}
return superToMem;
}
use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.
the class DefaultResourceAwareStrategy method orderExecutors.
/**
* Order executors based on how many in and out connections it will potentially need to make.
* First order components by the number of in and out connections it will have. Then iterate through the sorted list of components.
* For each component sort the neighbors of that component by how many connections it will have to make with that component.
* Add an executor from this component and then from each neighboring component in sorted order. Do this until there is nothing left to schedule
*
* @param td The topology the executors belong to
* @param unassignedExecutors a collection of unassigned executors that need to be unassigned. Should only try to assign executors from this list
* @return a list of executors in sorted order
*/
private List<ExecutorDetails> orderExecutors(TopologyDetails td, Collection<ExecutorDetails> unassignedExecutors) {
Map<String, Component> componentMap = td.getComponents();
List<ExecutorDetails> execsScheduled = new LinkedList<>();
Map<String, Queue<ExecutorDetails>> compToExecsToSchedule = new HashMap<>();
for (Component component : componentMap.values()) {
compToExecsToSchedule.put(component.id, new LinkedList<ExecutorDetails>());
for (ExecutorDetails exec : component.execs) {
if (unassignedExecutors.contains(exec)) {
compToExecsToSchedule.get(component.id).add(exec);
}
}
}
Set<Component> sortedComponents = sortComponents(componentMap);
sortedComponents.addAll(componentMap.values());
for (Component currComp : sortedComponents) {
Map<String, Component> neighbors = new HashMap<String, Component>();
for (String compId : (List<String>) ListUtils.union(currComp.children, currComp.parents)) {
neighbors.put(compId, componentMap.get(compId));
}
Set<Component> sortedNeighbors = sortNeighbors(currComp, neighbors);
Queue<ExecutorDetails> currCompExesToSched = compToExecsToSchedule.get(currComp.id);
boolean flag = false;
do {
flag = false;
if (!currCompExesToSched.isEmpty()) {
execsScheduled.add(currCompExesToSched.poll());
flag = true;
}
for (Component neighborComp : sortedNeighbors) {
Queue<ExecutorDetails> neighborCompExesToSched = compToExecsToSchedule.get(neighborComp.id);
if (!neighborCompExesToSched.isEmpty()) {
execsScheduled.add(neighborCompExesToSched.poll());
flag = true;
}
}
} while (flag);
}
return execsScheduled;
}
use of org.apache.storm.scheduler.ExecutorDetails in project storm by apache.
the class IsolationScheduler method hostAssignments.
private Map<String, List<AssignmentInfo>> hostAssignments(Cluster cluster) {
Collection<SchedulerAssignment> assignmentValues = cluster.getAssignments().values();
Map<String, List<AssignmentInfo>> hostAssignments = new HashMap<String, List<AssignmentInfo>>();
for (SchedulerAssignment sa : assignmentValues) {
Map<WorkerSlot, List<ExecutorDetails>> slotExecutors = Utils.reverseMap(sa.getExecutorToSlot());
Set<Map.Entry<WorkerSlot, List<ExecutorDetails>>> entries = slotExecutors.entrySet();
for (Map.Entry<WorkerSlot, List<ExecutorDetails>> entry : entries) {
WorkerSlot slot = entry.getKey();
List<ExecutorDetails> executors = entry.getValue();
String host = cluster.getHost(slot.getNodeId());
AssignmentInfo ass = new AssignmentInfo(slot, sa.getTopologyId(), new HashSet<ExecutorDetails>(executors));
List<AssignmentInfo> executorList = hostAssignments.get(host);
if (executorList == null) {
executorList = new ArrayList<AssignmentInfo>();
hostAssignments.put(host, executorList);
}
executorList.add(ass);
}
}
return hostAssignments;
}
Aggregations