use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer in project storm by apache.
the class RasBlacklistStrategy method releaseBlacklistWhenNeeded.
@Override
protected Set<String> releaseBlacklistWhenNeeded(Cluster cluster, final List<String> blacklistedNodeIds) {
LOG.info("RAS We have {} nodes blacklisted...", blacklistedNodeIds.size());
Set<String> readyToRemove = new HashSet<>();
if (blacklistedNodeIds.size() > 0) {
int availableSlots = cluster.getNonBlacklistedAvailableSlots(blacklistedNodeIds).size();
int neededSlots = 0;
NormalizedResourceOffer available = cluster.getNonBlacklistedClusterAvailableResources(blacklistedNodeIds);
NormalizedResourceOffer needed = new NormalizedResourceOffer();
for (TopologyDetails td : cluster.getTopologies()) {
if (cluster.needsSchedulingRas(td)) {
int slots = 0;
try {
slots = ServerUtils.getEstimatedWorkerCountForRasTopo(td.getConf(), td.getTopology());
} catch (InvalidTopologyException e) {
LOG.warn("Could not guess the number of slots needed for {}", td.getName(), e);
}
int assignedSlots = cluster.getAssignedNumWorkers(td);
int tdSlotsNeeded = slots - assignedSlots;
neededSlots += tdSlotsNeeded;
NormalizedResourceRequest resources = td.getApproximateTotalResources();
needed.add(resources);
LOG.warn("{} needs to be scheduled with {} and {} slots", td.getName(), resources, tdSlotsNeeded);
}
}
// Now we need to free up some resources...
Map<String, SupervisorDetails> availableSupervisors = cluster.getSupervisors();
NormalizedResourceOffer shortage = new NormalizedResourceOffer(needed);
shortage.remove(available, cluster.getResourceMetrics());
int shortageSlots = neededSlots - availableSlots;
LOG.debug("Need {} and {} slots.", needed, neededSlots);
LOG.debug("Available {} and {} slots.", available, availableSlots);
LOG.debug("Shortage {} and {} slots.", shortage, shortageSlots);
if (shortage.areAnyOverZero() || shortageSlots > 0) {
LOG.info("Need {} and {} slots more. Releasing some blacklisted nodes to cover it.", shortage, shortageSlots);
// release earliest blacklist - but release all supervisors on a given blacklisted host.
Map<String, Set<String>> hostToSupervisorIds = createHostToSupervisorMap(blacklistedNodeIds, cluster);
for (Set<String> supervisorIds : hostToSupervisorIds.values()) {
for (String supervisorId : supervisorIds) {
SupervisorDetails sd = availableSupervisors.get(supervisorId);
if (sd != null) {
NormalizedResourcesWithMemory sdAvailable = cluster.getAvailableResources(sd);
int sdAvailableSlots = cluster.getAvailablePorts(sd).size();
readyToRemove.add(supervisorId);
shortage.remove(sdAvailable, cluster.getResourceMetrics());
shortageSlots -= sdAvailableSlots;
LOG.info("Releasing {} with {} and {} slots leaving {} and {} slots to go", supervisorId, sdAvailable, sdAvailableSlots, shortage, shortageSlots);
}
}
// make sure we've handled all supervisors on the host before we break
if (!shortage.areAnyOverZero() && shortageSlots <= 0) {
// we have enough resources now...
break;
}
}
}
}
return readyToRemove;
}
Aggregations