use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testWithBlackListedHosts.
/**
* Black list all nodes for a rack before sorting nodes.
* Confirm that {@link NodeSorterHostProximity#sortAllNodes()} still works.
*/
@Test
void testWithBlackListedHosts() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 66;
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + 10);
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
int numRacks = 3;
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacks(numRacks, numSupersPerRack, numPortsPerSuper, 0, 0, cpuPerSuper, memPerSuper, new HashMap<>());
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
Map<String, List<String>> networkTopography = cluster.getNetworkTopography();
assertEquals("Expecting " + numRacks + " racks found " + networkTopography.size(), numRacks, networkTopography.size());
assertTrue("Expecting racks count to be >= 3, found " + networkTopography.size(), networkTopography.size() >= 3);
Set<String> blackListedHosts = new HashSet<>();
List<SupervisorDetails> supArray = new ArrayList<>(supMap.values());
for (int i = 0; i < numSupersPerRack; i++) {
blackListedHosts.add(supArray.get(i).getHost());
}
blacklistHostsAndSortNodes(blackListedHosts, supMap.values(), cluster, td1);
String rackToClear = cluster.getNetworkTopography().keySet().stream().findFirst().get();
blackListedHosts = new HashSet<>(cluster.getNetworkTopography().get(rackToClear));
blacklistHostsAndSortNodes(blackListedHosts, supMap.values(), cluster, td1);
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testWithImpairedClusterNetworkTopography.
/**
* Assign and then clear out a rack to host list mapping in cluster.networkTopography.
* Expected behavior is that:
* <li>the rack without hosts does not show up in {@link NodeSorterHostProximity#getSortedRacks()}</li>
* <li>all the supervisor nodes still get returned in {@link NodeSorterHostProximity#sortAllNodes()} ()}</li>
* <li>supervisors on cleared rack show up under {@link DNSToSwitchMapping#DEFAULT_RACK}</li>
*
* <p>
* Force an usual condition, where one of the racks is still passed to LazyNodeSortingIterator with
* an empty list and then ensure that code is resilient.
* </p>
*/
@Test
void testWithImpairedClusterNetworkTopography() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 66;
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + 10);
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
int numRacks = 3;
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacks(numRacks, numSupersPerRack, numPortsPerSuper, 0, 0, cpuPerSuper, memPerSuper, new HashMap<>());
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
Map<String, List<String>> networkTopography = cluster.getNetworkTopography();
assertEquals("Expecting " + numRacks + " racks found " + networkTopography.size(), numRacks, networkTopography.size());
assertTrue("Expecting racks count to be >= 3, found " + networkTopography.size(), networkTopography.size() >= 3);
// Impair cluster.networkTopography and set one rack to have zero hosts, getSortedRacks should exclude this rack.
// Keep, the supervisorDetails unchanged - confirm that these nodes are not lost even with incomplete networkTopography
String rackIdToZero = networkTopography.keySet().stream().findFirst().get();
impairClusterRack(cluster, rackIdToZero, true, false);
NodeSorterHostProximity nodeSorterHostProximity = new NodeSorterHostProximity(cluster, td1);
nodeSorterHostProximity.getSortedRacks().forEach(x -> assertNotEquals(x.id, rackIdToZero));
// confirm that the above action has not lost the hosts and that they appear under the DEFAULT rack
{
Set<String> seenRacks = new HashSet<>();
nodeSorterHostProximity.getSortedRacks().forEach(x -> seenRacks.add(x.id));
assertEquals("Expecting rack cnt to be still " + numRacks, numRacks, seenRacks.size());
assertTrue("Expecting to see default-rack=" + DNSToSwitchMapping.DEFAULT_RACK + " in sortedRacks", seenRacks.contains(DNSToSwitchMapping.DEFAULT_RACK));
}
// now check if node/supervisor is missing when sorting all nodes
Set<String> expectedNodes = supMap.keySet();
Set<String> seenNodes = new HashSet<>();
nodeSorterHostProximity.prepare(null);
nodeSorterHostProximity.sortAllNodes().forEach(n -> seenNodes.add(n));
assertEquals("Expecting see all supervisors ", expectedNodes, seenNodes);
// Now fully impair the cluster - confirm no default rack
{
cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(new TestDNSToSwitchMapping(supMap.values()).getRackToHosts());
impairClusterRack(cluster, rackIdToZero, true, true);
Set<String> seenRacks = new HashSet<>();
NodeSorterHostProximity nodeSorterHostProximity2 = new NodeSorterHostProximity(cluster, td1);
nodeSorterHostProximity2.getSortedRacks().forEach(x -> seenRacks.add(x.id));
Map<String, Set<String>> rackIdToHosts = nodeSorterHostProximity2.getRackIdToHosts();
String dumpOfRacks = rackIdToHosts.entrySet().stream().map(x -> String.format("rack %s -> hosts [%s]", x.getKey(), String.join(",", x.getValue()))).collect(Collectors.joining("\n\t"));
assertEquals("Expecting rack cnt to be " + (numRacks - 1) + " but found " + seenRacks.size() + "\n\t" + dumpOfRacks, numRacks - 1, seenRacks.size());
assertFalse("Found default-rack=" + DNSToSwitchMapping.DEFAULT_RACK + " in \n\t" + dumpOfRacks, seenRacks.contains(DNSToSwitchMapping.DEFAULT_RACK));
}
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class Nimbus method makeSupervisorSummary.
private SupervisorSummary makeSupervisorSummary(String supervisorId, SupervisorInfo info) {
Set<String> blacklistedSupervisorIds = Collections.emptySet();
if (scheduler instanceof BlacklistScheduler) {
BlacklistScheduler bs = (BlacklistScheduler) scheduler;
blacklistedSupervisorIds = bs.getBlacklistSupervisorIds();
}
LOG.debug("INFO: {} ID: {}", info, supervisorId);
int numPorts = 0;
if (info.is_set_meta()) {
numPorts = info.get_meta_size();
}
int numUsedPorts = 0;
if (info.is_set_used_ports()) {
numUsedPorts = info.get_used_ports_size();
}
LOG.debug("NUM PORTS: {}", numPorts);
SupervisorSummary ret = new SupervisorSummary(info.get_hostname(), (int) info.get_uptime_secs(), numPorts, numUsedPorts, supervisorId);
ret.set_total_resources(info.get_resources_map());
SupervisorResources resources = nodeIdToResources.get().get(supervisorId);
if (resources != null && underlyingScheduler instanceof ResourceAwareScheduler) {
ret.set_used_mem(resources.getUsedMem());
ret.set_used_cpu(resources.getUsedCpu());
ret.set_used_generic_resources(resources.getUsedGenericResources());
if (isFragmented(resources)) {
final double availableCpu = resources.getAvailableCpu();
if (availableCpu < 0) {
LOG.warn("Negative fragmented CPU on {}", supervisorId);
}
ret.set_fragmented_cpu(availableCpu);
final double availableMem = resources.getAvailableMem();
if (availableMem < 0) {
LOG.warn("Negative fragmented Mem on {}", supervisorId);
}
ret.set_fragmented_mem(availableMem);
}
}
if (info.is_set_version()) {
ret.set_version(info.get_version());
}
if (blacklistedSupervisorIds.contains(supervisorId)) {
ret.set_blacklisted(true);
} else {
ret.set_blacklisted(false);
}
return ret;
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class Nimbus method getOwnerResourceSummaries.
@Override
public List<OwnerResourceSummary> getOwnerResourceSummaries(String owner) throws AuthorizationException, TException {
try {
getOwnerResourceSummariesCalls.mark();
checkAuthorization(null, null, "getOwnerResourceSummaries");
IStormClusterState state = stormClusterState;
Map<String, Assignment> topoIdToAssignments = state.assignmentsInfo();
Map<String, StormBase> topoIdToBases = state.topologyBases();
Map<String, Number> clusterSchedulerConfig = scheduler.config();
// put [owner-> StormBase-list] mapping to ownerToBasesMap
// if this owner (the input parameter) is null, add all the owners with stormbase and guarantees
// else, add only this owner (the input paramter) to the map
Map<String, List<StormBase>> ownerToBasesMap = new HashMap<>();
if (owner == null) {
// add all the owners to the map
for (StormBase base : topoIdToBases.values()) {
String baseOwner = base.get_owner();
if (!ownerToBasesMap.containsKey(baseOwner)) {
List<StormBase> stormbases = new ArrayList<>();
stormbases.add(base);
ownerToBasesMap.put(baseOwner, stormbases);
} else {
ownerToBasesMap.get(baseOwner).add(base);
}
}
// in addition, add all the owners with guarantees
List<String> ownersWithGuarantees = new ArrayList<>(clusterSchedulerConfig.keySet());
for (String ownerWithGuarantees : ownersWithGuarantees) {
if (!ownerToBasesMap.containsKey(ownerWithGuarantees)) {
ownerToBasesMap.put(ownerWithGuarantees, new ArrayList<>());
}
}
} else {
// only put this owner to the map
List<StormBase> stormbases = new ArrayList<>();
for (StormBase base : topoIdToBases.values()) {
if (owner.equals(base.get_owner())) {
stormbases.add(base);
}
}
ownerToBasesMap.put(owner, stormbases);
}
List<OwnerResourceSummary> ret = new ArrayList<>();
// for each owner, get resources, configs, and aggregate
for (Entry<String, List<StormBase>> ownerToBasesEntry : ownerToBasesMap.entrySet()) {
String theOwner = ownerToBasesEntry.getKey();
TopologyResources totalResourcesAggregate = new TopologyResources();
int totalExecutors = 0;
int totalWorkers = 0;
int totalTasks = 0;
for (StormBase base : ownerToBasesEntry.getValue()) {
try {
String topoId = toTopoId(base.get_name());
TopologyResources resources = getResourcesForTopology(topoId, base);
totalResourcesAggregate = totalResourcesAggregate.add(resources);
Assignment ownerAssignment = topoIdToAssignments.get(topoId);
if (ownerAssignment != null && ownerAssignment.get_executor_node_port() != null) {
totalExecutors += ownerAssignment.get_executor_node_port().keySet().size();
totalWorkers += new HashSet(ownerAssignment.get_executor_node_port().values()).size();
for (List<Long> executorId : ownerAssignment.get_executor_node_port().keySet()) {
totalTasks += StormCommon.executorIdToTasks(executorId).size();
}
}
} catch (NotAliveException e) {
LOG.warn("{} is not alive.", base.get_name());
}
}
double requestedTotalMemory = totalResourcesAggregate.getRequestedMemOnHeap() + totalResourcesAggregate.getRequestedMemOffHeap();
double assignedTotalMemory = totalResourcesAggregate.getAssignedMemOnHeap() + totalResourcesAggregate.getAssignedMemOffHeap();
OwnerResourceSummary ownerResourceSummary = new OwnerResourceSummary(theOwner);
ownerResourceSummary.set_total_topologies(ownerToBasesEntry.getValue().size());
ownerResourceSummary.set_total_executors(totalExecutors);
ownerResourceSummary.set_total_workers(totalWorkers);
ownerResourceSummary.set_total_tasks(totalTasks);
ownerResourceSummary.set_memory_usage(assignedTotalMemory);
ownerResourceSummary.set_cpu_usage(totalResourcesAggregate.getAssignedCpu());
ownerResourceSummary.set_requested_on_heap_memory(totalResourcesAggregate.getRequestedMemOnHeap());
ownerResourceSummary.set_requested_off_heap_memory(totalResourcesAggregate.getRequestedMemOffHeap());
ownerResourceSummary.set_requested_total_memory(requestedTotalMemory);
ownerResourceSummary.set_requested_cpu(totalResourcesAggregate.getRequestedCpu());
ownerResourceSummary.set_assigned_on_heap_memory(totalResourcesAggregate.getAssignedMemOnHeap());
ownerResourceSummary.set_assigned_off_heap_memory(totalResourcesAggregate.getAssignedMemOffHeap());
if (clusterSchedulerConfig.containsKey(theOwner)) {
if (underlyingScheduler instanceof ResourceAwareScheduler) {
Map<String, Object> schedulerConfig = (Map) clusterSchedulerConfig.get(theOwner);
if (schedulerConfig != null) {
ownerResourceSummary.set_memory_guarantee((double) schedulerConfig.getOrDefault("memory", 0));
ownerResourceSummary.set_cpu_guarantee((double) schedulerConfig.getOrDefault("cpu", 0));
ownerResourceSummary.set_memory_guarantee_remaining(ownerResourceSummary.get_memory_guarantee() - ownerResourceSummary.get_memory_usage());
ownerResourceSummary.set_cpu_guarantee_remaining(ownerResourceSummary.get_cpu_guarantee() - ownerResourceSummary.get_cpu_usage());
}
} else if (underlyingScheduler instanceof MultitenantScheduler) {
ownerResourceSummary.set_isolated_node_guarantee((int) clusterSchedulerConfig.getOrDefault(theOwner, 0));
}
}
LOG.debug("{}", ownerResourceSummary.toString());
ret.add(ownerResourceSummary);
}
return ret;
} catch (Exception e) {
LOG.warn("Get owner resource summaries exception. (owner = '{}')", owner);
if (e instanceof TException) {
throw (TException) e;
}
throw new RuntimeException(e);
}
}
Aggregations