use of org.apache.storm.scheduler.Cluster in project storm by apache.
the class TestBlacklistScheduler method TestReleaseBlacklist.
@Test
public void TestReleaseBlacklist() {
INimbus iNimbus = new TestUtilsForBlacklistScheduler.INimbusTest();
Map<String, SupervisorDetails> supMap = TestUtilsForBlacklistScheduler.genSupervisors(3, 4);
Config config = new Config();
config.putAll(Utils.readDefaultConfig());
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_TIME, 200);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_COUNT, 2);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_RESUME_TIME, 300);
Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
TopologyDetails topo1 = TestUtilsForBlacklistScheduler.getTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, true);
TopologyDetails topo2 = TestUtilsForBlacklistScheduler.getTopology("topo-2", config, 5, 15, 1, 1, currentTime - 8, true);
TopologyDetails topo3 = TestUtilsForBlacklistScheduler.getTopology("topo-3", config, 5, 15, 1, 1, currentTime - 16, true);
TopologyDetails topo4 = TestUtilsForBlacklistScheduler.getTopology("topo-4", config, 5, 15, 1, 1, currentTime - 32, true);
topoMap.put(topo1.getId(), topo1);
Topologies topologies = new Topologies(topoMap);
StormMetricsRegistry metricsRegistry = new StormMetricsRegistry();
ResourceMetrics resourceMetrics = new ResourceMetrics(metricsRegistry);
Cluster cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config);
scheduler = new BlacklistScheduler(new DefaultScheduler());
scheduler.prepare(config, metricsRegistry);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
Assert.assertEquals("blacklist", Collections.singleton("host-0"), cluster.getBlacklistedHosts());
topoMap.put(topo2.getId(), topo2);
topoMap.put(topo3.getId(), topo3);
topoMap.put(topo4.getId(), topo4);
topologies = new Topologies(topoMap);
cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
Assert.assertEquals("blacklist", Collections.emptySet(), cluster.getBlacklistedHosts());
}
use of org.apache.storm.scheduler.Cluster in project storm by apache.
the class TestBlacklistScheduler method TestResumeBlacklist.
@Test
public void TestResumeBlacklist() {
INimbus iNimbus = new TestUtilsForBlacklistScheduler.INimbusTest();
Map<String, SupervisorDetails> supMap = TestUtilsForBlacklistScheduler.genSupervisors(3, 4);
Config config = new Config();
config.putAll(Utils.readDefaultConfig());
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_TIME, 200);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_COUNT, 2);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_RESUME_TIME, 300);
Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
TopologyDetails topo1 = TestUtilsForBlacklistScheduler.getTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, true);
topoMap.put(topo1.getId(), topo1);
Topologies topologies = new Topologies(topoMap);
StormMetricsRegistry metricsRegistry = new StormMetricsRegistry();
ResourceMetrics resourceMetrics = new ResourceMetrics(metricsRegistry);
Cluster cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config);
scheduler = new BlacklistScheduler(new DefaultScheduler());
scheduler.prepare(config, metricsRegistry);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config);
scheduler.schedule(topologies, cluster);
cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config);
scheduler.schedule(topologies, cluster);
Assert.assertEquals("blacklist", Collections.singleton("host-0"), cluster.getBlacklistedHosts());
for (int i = 0; i < 300 / 10 - 2; i++) {
scheduler.schedule(topologies, cluster);
}
Assert.assertEquals("blacklist", Collections.singleton("host-0"), cluster.getBlacklistedHosts());
scheduler.schedule(topologies, cluster);
Assert.assertEquals("blacklist", Collections.emptySet(), cluster.getBlacklistedHosts());
}
use of org.apache.storm.scheduler.Cluster in project storm by apache.
the class TestBlacklistScheduler method TestList.
@Test
public void TestList() {
INimbus iNimbus = new TestUtilsForBlacklistScheduler.INimbusTest();
Config config = new Config();
config.putAll(Utils.readDefaultConfig());
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_TIME, 200);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_TOLERANCE_COUNT, 2);
config.put(DaemonConfig.BLACKLIST_SCHEDULER_RESUME_TIME, 300);
Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
TopologyDetails topo1 = TestUtilsForBlacklistScheduler.getTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, true);
TopologyDetails topo2 = TestUtilsForBlacklistScheduler.getTopology("topo-2", config, 5, 15, 1, 1, currentTime - 2, true);
topoMap.put(topo1.getId(), topo1);
topoMap.put(topo2.getId(), topo2);
Topologies topologies = new Topologies(topoMap);
scheduler = new BlacklistScheduler(new DefaultScheduler());
scheduler.prepare(config, new StormMetricsRegistry());
List<Map<Integer, List<Integer>>> faultList = new ArrayList<>();
faultList.add(new HashMap<>());
faultList.add(ImmutableMap.of(0, ImmutableList.of(0, 1)));
faultList.add(ImmutableMap.of(0, new ArrayList<>()));
for (int i = 0; i < 17; i++) {
faultList.add(new HashMap<>());
}
faultList.add(ImmutableMap.of(0, ImmutableList.of(0, 1)));
faultList.add(ImmutableMap.of(1, ImmutableList.of(1)));
for (int i = 0; i < 8; i++) {
faultList.add(new HashMap<>());
}
faultList.add(ImmutableMap.of(0, ImmutableList.of(1)));
faultList.add(ImmutableMap.of(1, ImmutableList.of(1)));
for (int i = 0; i < 30; i++) {
faultList.add(new HashMap<>());
}
List<Map<String, SupervisorDetails>> supervisorsList = FaultGenerateUtils.getSupervisorsList(3, 4, faultList);
Cluster cluster = null;
int count = 0;
for (Map<String, SupervisorDetails> supervisors : supervisorsList) {
cluster = FaultGenerateUtils.nextCluster(cluster, supervisors, iNimbus, config, topologies);
scheduler.schedule(topologies, cluster);
if (count == 0) {
Set<String> hosts = new HashSet<>();
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 2) {
Set<String> hosts = new HashSet<>();
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 3) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 30) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 31) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
hosts.add("host-1");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 32) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
hosts.add("host-1");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 60) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
hosts.add("host-1");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 61) {
Set<String> hosts = new HashSet<>();
hosts.add("host-0");
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
} else if (count == 62) {
Set<String> hosts = new HashSet<>();
Assert.assertEquals("blacklist", hosts, cluster.getBlacklistedHosts());
}
count++;
}
}
use of org.apache.storm.scheduler.Cluster in project storm by apache.
the class Nimbus method normalizeConf.
@SuppressWarnings("unchecked")
private static /**
* Create a normalized topology conf.
*
* @param conf the nimbus conf
* @param topoConf initial topology conf
* @param topology the Storm topology
*/
Map<String, Object> normalizeConf(Map<String, Object> conf, Map<String, Object> topoConf, StormTopology topology) {
// ensure that serializations are same for all tasks no matter what's on
// the supervisors. this also allows you to declare the serializations as a sequence
List<Map<String, Object>> allConfs = new ArrayList<>();
for (Object comp : StormCommon.allComponents(topology).values()) {
allConfs.add(StormCommon.componentConf(comp));
}
Set<String> decorators = new HashSet<>();
// Yes we are putting in a config that is not the same type we pulled out.
Map<String, String> serializers = new HashMap<>();
for (Map<String, Object> c : allConfs) {
addToDecorators(decorators, (List<String>) c.get(Config.TOPOLOGY_KRYO_DECORATORS));
addToSerializers(serializers, (List<Object>) c.get(Config.TOPOLOGY_KRYO_REGISTER));
}
addToDecorators(decorators, (List<String>) topoConf.getOrDefault(Config.TOPOLOGY_KRYO_DECORATORS, conf.get(Config.TOPOLOGY_KRYO_DECORATORS)));
addToSerializers(serializers, (List<Object>) topoConf.getOrDefault(Config.TOPOLOGY_KRYO_REGISTER, conf.get(Config.TOPOLOGY_KRYO_REGISTER)));
Map<String, Object> mergedConf = Utils.merge(conf, topoConf);
Map<String, Object> ret = new HashMap<>(topoConf);
ret.put(Config.TOPOLOGY_KRYO_REGISTER, serializers);
ret.put(Config.TOPOLOGY_KRYO_DECORATORS, new ArrayList<>(decorators));
ret.put(Config.TOPOLOGY_ACKER_EXECUTORS, mergedConf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
ret.put(Config.TOPOLOGY_EVENTLOGGER_EXECUTORS, mergedConf.get(Config.TOPOLOGY_EVENTLOGGER_EXECUTORS));
ret.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, mergedConf.get(Config.TOPOLOGY_MAX_TASK_PARALLELISM));
// storm.messaging.netty.authentication is about inter-worker communication
// enforce netty authentication when either topo or daemon set it to true
boolean enforceNettyAuth = false;
if (!topoConf.containsKey(Config.STORM_MESSAGING_NETTY_AUTHENTICATION)) {
enforceNettyAuth = (Boolean) conf.get(Config.STORM_MESSAGING_NETTY_AUTHENTICATION);
} else {
enforceNettyAuth = (Boolean) topoConf.get(Config.STORM_MESSAGING_NETTY_AUTHENTICATION) || (Boolean) conf.get(Config.STORM_MESSAGING_NETTY_AUTHENTICATION);
}
LOG.debug("For netty authentication, topo conf is: {}, cluster conf is: {}, Enforce netty auth: {}", topoConf.get(Config.STORM_MESSAGING_NETTY_AUTHENTICATION), conf.get(Config.STORM_MESSAGING_NETTY_AUTHENTICATION), enforceNettyAuth);
ret.put(Config.STORM_MESSAGING_NETTY_AUTHENTICATION, enforceNettyAuth);
if (!mergedConf.containsKey(Config.TOPOLOGY_METRICS_REPORTERS) && mergedConf.containsKey(Config.STORM_METRICS_REPORTERS)) {
ret.put(Config.TOPOLOGY_METRICS_REPORTERS, mergedConf.get(Config.STORM_METRICS_REPORTERS));
}
// add any system metrics reporters to the topology metrics reporters
if (conf.containsKey(Config.STORM_TOPOLOGY_METRICS_SYSTEM_REPORTERS)) {
List<Map<String, Object>> reporters = (List<Map<String, Object>>) ret.computeIfAbsent(Config.TOPOLOGY_METRICS_REPORTERS, (key) -> new ArrayList<>());
List<Map<String, Object>> systemReporters = (List<Map<String, Object>>) conf.get(Config.STORM_TOPOLOGY_METRICS_SYSTEM_REPORTERS);
reporters.addAll(systemReporters);
}
// Don't allow topoConf to override various cluster-specific properties.
// Specifically adding the cluster settings to the topoConf here will make sure these settings
// also override the subsequently generated conf picked up locally on the classpath.
//
// We will be dealing with 3 confs:
// 1) the submitted topoConf created here
// 2) the combined classpath conf with the topoConf added on top
// 3) the nimbus conf with conf 2 above added on top.
//
// By first forcing the topology conf to contain the nimbus settings, we guarantee all three confs
// will have the correct settings that cannot be overriden by the submitter.
ret.put(Config.STORM_CGROUP_HIERARCHY_DIR, conf.get(Config.STORM_CGROUP_HIERARCHY_DIR));
ret.put(Config.WORKER_METRICS, conf.get(Config.WORKER_METRICS));
if (mergedConf.containsKey(Config.TOPOLOGY_WORKER_TIMEOUT_SECS)) {
int workerTimeoutSecs = (Integer) ObjectReader.getInt(mergedConf.get(Config.TOPOLOGY_WORKER_TIMEOUT_SECS));
int workerMaxTimeoutSecs = (Integer) ObjectReader.getInt(mergedConf.get(Config.WORKER_MAX_TIMEOUT_SECS));
if (workerTimeoutSecs > workerMaxTimeoutSecs) {
ret.put(Config.TOPOLOGY_WORKER_TIMEOUT_SECS, workerMaxTimeoutSecs);
String topoId = (String) mergedConf.get(Config.STORM_ID);
LOG.warn("Topology {} topology.worker.timeout.secs is too large. Reducing from {} to {}", topoId, workerTimeoutSecs, workerMaxTimeoutSecs);
}
}
return ret;
}
use of org.apache.storm.scheduler.Cluster in project storm by apache.
the class Nimbus method computeNewSchedulerAssignments.
private Map<String, SchedulerAssignment> computeNewSchedulerAssignments(Map<String, Assignment> existingAssignments, Topologies topologies, Map<String, StormBase> bases, String scratchTopologyId) throws KeyNotFoundException, AuthorizationException, InvalidTopologyException, IOException {
Map<String, Set<List<Integer>>> topoToExec = computeTopologyToExecutors(bases);
Set<String> zkHeartbeatTopologies = topologies.getTopologies().stream().filter(topo -> !supportRpcHeartbeat(topo)).map(TopologyDetails::getId).collect(Collectors.toSet());
updateAllHeartbeats(existingAssignments, topoToExec, zkHeartbeatTopologies);
Map<String, Set<List<Integer>>> topoToAliveExecutors = computeTopologyToAliveExecutors(existingAssignments, topoToExec, scratchTopologyId);
Map<String, Set<Long>> supervisorToDeadPorts = computeSupervisorToDeadPorts(existingAssignments, topoToExec, topoToAliveExecutors);
Map<String, SchedulerAssignmentImpl> topoToSchedAssignment = computeTopologyToSchedulerAssignment(existingAssignments, topoToAliveExecutors);
Set<String> missingAssignmentTopologies = new HashSet<>();
for (TopologyDetails topo : topologies.getTopologies()) {
String id = topo.getId();
Set<List<Integer>> allExecs = topoToExec.get(id);
Set<List<Integer>> aliveExecs = topoToAliveExecutors.get(id);
int numDesiredWorkers = topo.getNumWorkers();
int numAssignedWorkers = numUsedWorkers(topoToSchedAssignment.get(id));
if (allExecs == null || allExecs.isEmpty() || !allExecs.equals(aliveExecs) || numDesiredWorkers > numAssignedWorkers) {
// We have something to schedule...
missingAssignmentTopologies.add(id);
}
}
Map<String, SupervisorDetails> supervisors = readAllSupervisorDetails(supervisorToDeadPorts, topologies, missingAssignmentTopologies);
Cluster cluster = new Cluster(inimbus, resourceMetrics, supervisors, topoToSchedAssignment, topologies, conf);
cluster.setStatusMap(idToSchedStatus.get());
schedulingStartTimeNs.set(Time.nanoTime());
scheduler.schedule(topologies, cluster);
// Get and set the start time before getting current time in order to avoid potential race with the longest-scheduling-time-ms gauge
final Long startTime = schedulingStartTimeNs.getAndSet(null);
long elapsedNs = Time.nanoTime() - startTime;
longestSchedulingTime.accumulateAndGet(elapsedNs, Math::max);
schedulingDuration.update(elapsedNs, TimeUnit.NANOSECONDS);
LOG.debug("Scheduling took {} ms for {} topologies", TimeUnit.NANOSECONDS.toMillis(elapsedNs), topologies.getTopologies().size());
// merge with existing statuses
idToSchedStatus.set(Utils.merge(idToSchedStatus.get(), cluster.getStatusMap()));
nodeIdToResources.set(cluster.getSupervisorsResourcesMap());
// This is a hack for non-ras scheduler topology and worker resources
Map<String, TopologyResources> resources = cluster.getTopologyResourcesMap();
idToResources.getAndAccumulate(resources, (orig, update) -> Utils.merge(orig, update));
Map<String, Map<WorkerSlot, WorkerResources>> workerResources = new HashMap<>();
for (Entry<String, Map<WorkerSlot, WorkerResources>> uglyWorkerResources : cluster.getWorkerResourcesMap().entrySet()) {
Map<WorkerSlot, WorkerResources> slotToResources = new HashMap<>();
for (Entry<WorkerSlot, WorkerResources> uglySlotToResources : uglyWorkerResources.getValue().entrySet()) {
WorkerResources wr = uglySlotToResources.getValue();
slotToResources.put(uglySlotToResources.getKey(), wr);
}
workerResources.put(uglyWorkerResources.getKey(), slotToResources);
}
idToWorkerResources.getAndAccumulate(workerResources, (orig, update) -> Utils.merge(orig, update));
return cluster.getAssignments();
}
Aggregations