use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.
the class TestGenericResourceAwareStrategy method testGenericResourceAwareStrategySharedMemory.
/**
* test if the scheduling logic for the GenericResourceAwareStrategy is correct.
*/
@Test
public void testGenericResourceAwareStrategySharedMemory() {
int spoutParallelism = 2;
int boltParallelism = 2;
int numBolts = 3;
double cpuPercent = 10;
double memoryOnHeap = 10;
double memoryOffHeap = 10;
double sharedOnHeap = 500;
double sharedOffHeapNode = 700;
double sharedOffHeapWorker = 500;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new TestSpout(), spoutParallelism).addResource("gpu.count", 1.0);
builder.setBolt("bolt-1", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWorker, "bolt-1 shared off heap worker")).shuffleGrouping("spout");
builder.setBolt("bolt-2", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapNode, "bolt-2 shared node")).shuffleGrouping("bolt-1");
builder.setBolt("bolt-3", new TestBolt(), boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeap, "bolt-3 shared worker")).shuffleGrouping("bolt-2");
StormTopology stormToplogy = builder.createTopology();
INimbus iNimbus = new INimbusTest();
Config conf = createGrasClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap());
Map<String, Double> genericResourcesMap = new HashMap<>();
genericResourcesMap.put("gpu.count", 1.0);
Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap);
conf.put(Config.TOPOLOGY_PRIORITY, 0);
conf.put(Config.TOPOLOGY_NAME, "testTopology");
conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(stormToplogy), currentTime, "user");
Topologies topologies = new Topologies(topo);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
scheduler = new ResourceAwareScheduler();
scheduler.prepare(conf, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
for (Entry<String, SupervisorResources> entry : cluster.getSupervisorsResourcesMap().entrySet()) {
String supervisorId = entry.getKey();
SupervisorResources resources = entry.getValue();
assertTrue(supervisorId, resources.getTotalCpu() >= resources.getUsedCpu());
assertTrue(supervisorId, resources.getTotalMem() >= resources.getUsedMem());
}
// If we didn't take GPUs into account everything would fit under a single slot
// But because there is only 1 GPU per node, and each of the 2 spouts needs a GPU
// It has to be scheduled on at least 2 nodes, and hence 2 slots.
// Because of this all of the bolts will be scheduled on a single slot with one of
// the spouts and the other spout is on its own slot. So everything that can be shared is
// shared.
int totalNumberOfTasks = (spoutParallelism + (boltParallelism * numBolts));
double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeap;
double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker;
SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId());
Set<WorkerSlot> slots = assignment.getSlots();
Map<String, Double> nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapNodeMemory();
LOG.info("NODE TO SHARED OFF HEAP {}", nodeToTotalShared);
Map<WorkerSlot, WorkerResources> scheduledResources = assignment.getScheduledResources();
assertEquals(2, slots.size());
assertEquals(2, nodeToTotalShared.size());
assertEquals(2, scheduledResources.size());
double totalFoundCPU = 0.0;
double totalFoundOnHeap = 0.0;
double totalFoundWorkerOffHeap = 0.0;
for (WorkerSlot ws : slots) {
WorkerResources resources = scheduledResources.get(ws);
totalFoundCPU += resources.get_cpu();
totalFoundOnHeap += resources.get_mem_on_heap();
totalFoundWorkerOffHeap += resources.get_mem_off_heap();
}
assertEquals(totalExpectedCPU, totalFoundCPU, 0.01);
assertEquals(totalExpectedOnHeap, totalFoundOnHeap, 0.01);
assertEquals(totalExpectedWorkerOffHeap, totalFoundWorkerOffHeap, 0.01);
assertEquals(sharedOffHeapNode, nodeToTotalShared.values().stream().mapToDouble((d) -> d).sum(), 0.01);
assertEquals(sharedOnHeap, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_on_heap).sum(), 0.01);
assertEquals(sharedOffHeapWorker, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_off_heap).sum(), 0.01);
}
use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.
the class TestDefaultResourceAwareStrategy method testDefaultResourceAwareStrategySharedMemory.
/**
* test if the scheduling shared memory is correct with/without oneExecutorPerWorker enabled
*/
@ParameterizedTest
@EnumSource(WorkerRestrictionType.class)
public void testDefaultResourceAwareStrategySharedMemory(WorkerRestrictionType schedulingLimitation) {
int spoutParallelism = 2;
int boltParallelism = 2;
int numBolts = 3;
double cpuPercent = 10;
double memoryOnHeap = 10;
double memoryOffHeap = 10;
double sharedOnHeapWithinWorker = 400;
double sharedOffHeapWithinNode = 700;
double sharedOffHeapWithinWorker = 600;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new TestSpout(), spoutParallelism);
builder.setBolt("bolt-1", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout");
builder.setBolt("bolt-2", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1");
builder.setBolt("bolt-3", new TestBolt(), boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2");
StormTopology stormToplogy = builder.createTopology();
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 500, 2000);
Config conf = createClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null);
conf.put(Config.TOPOLOGY_PRIORITY, 0);
conf.put(Config.TOPOLOGY_NAME, "testTopology");
conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
switch(schedulingLimitation) {
case WORKER_RESTRICTION_ONE_EXECUTOR:
conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true);
break;
case WORKER_RESTRICTION_ONE_COMPONENT:
conf.put(Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER, true);
break;
}
TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(stormToplogy), CURRENT_TIME, "user");
Topologies topologies = new Topologies(topo);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
scheduler = new ResourceAwareScheduler();
scheduler.prepare(conf, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
for (Entry<String, SupervisorResources> entry : cluster.getSupervisorsResourcesMap().entrySet()) {
String supervisorId = entry.getKey();
SupervisorResources resources = entry.getValue();
assertTrue(supervisorId, resources.getTotalCpu() >= resources.getUsedCpu());
assertTrue(supervisorId, resources.getTotalMem() >= resources.getUsedMem());
}
int totalNumberOfTasks = spoutParallelism + boltParallelism * numBolts;
SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId());
TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId());
long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count();
String assignmentString = "Assignments:\n\t" + assignment.getSlotToExecutors().entrySet().stream().map(x -> String.format("Node=%s, components=%s", x.getKey().getNodeId(), x.getValue().stream().map(y -> topo.getComponentFromExecutor(y)).collect(Collectors.joining(",")))).collect(Collectors.joining("\n\t"));
if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_NONE) {
// Everything should fit in a single slot
double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker;
double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker;
assertThat(assignment.getSlots().size(), is(1));
WorkerSlot ws = assignment.getSlots().iterator().next();
String nodeId = ws.getNodeId();
assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().size(), is(1));
assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().get(nodeId), closeTo(sharedOffHeapWithinNode, 0.01));
assertThat(assignment.getScheduledResources().size(), is(1));
WorkerResources resources = assignment.getScheduledResources().get(ws);
assertThat(resources.get_cpu(), closeTo(totalExpectedCPU, 0.01));
assertThat(resources.get_mem_on_heap(), closeTo(totalExpectedOnHeap, 0.01));
assertThat(resources.get_mem_off_heap(), closeTo(totalExpectedWorkerOffHeap, 0.01));
assertThat(resources.get_shared_mem_on_heap(), closeTo(sharedOnHeapWithinWorker, 0.01));
assertThat(resources.get_shared_mem_off_heap(), closeTo(sharedOffHeapWithinWorker, 0.01));
} else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR) {
double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + 2 * sharedOnHeapWithinWorker;
double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode;
double expectedMemSharedOnHeap = 2 * sharedOnHeapWithinWorker;
double expectedMemSharedOffHeap = 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode;
double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap;
double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap;
assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01));
assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01));
assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01));
assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01));
assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01));
assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01));
double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01));
int numAssignedWorkers = cluster.getAssignedNumWorkers(topo);
assertThat(numAssignedWorkers, is(8));
assertThat(assignment.getSlots().size(), is(8));
assertThat(assignmentString, numNodes, is(2L));
} else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT) {
double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker;
double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker + sharedOffHeapWithinNode;
double expectedMemSharedOnHeap = sharedOnHeapWithinWorker;
double expectedMemSharedOffHeap = sharedOffHeapWithinWorker + sharedOffHeapWithinNode;
double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap;
double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap;
assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01));
assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01));
assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01));
assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01));
assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01));
assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01));
double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01));
int numAssignedWorkers = cluster.getAssignedNumWorkers(topo);
assertThat(numAssignedWorkers, is(4));
assertThat(assignment.getSlots().size(), is(4));
assertThat(numNodes, is(1L));
}
}
use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.
the class Nimbus method makeSupervisorSummary.
private SupervisorSummary makeSupervisorSummary(String supervisorId, SupervisorInfo info) {
Set<String> blacklistedSupervisorIds = Collections.emptySet();
if (scheduler instanceof BlacklistScheduler) {
BlacklistScheduler bs = (BlacklistScheduler) scheduler;
blacklistedSupervisorIds = bs.getBlacklistSupervisorIds();
}
LOG.debug("INFO: {} ID: {}", info, supervisorId);
int numPorts = 0;
if (info.is_set_meta()) {
numPorts = info.get_meta_size();
}
int numUsedPorts = 0;
if (info.is_set_used_ports()) {
numUsedPorts = info.get_used_ports_size();
}
LOG.debug("NUM PORTS: {}", numPorts);
SupervisorSummary ret = new SupervisorSummary(info.get_hostname(), (int) info.get_uptime_secs(), numPorts, numUsedPorts, supervisorId);
ret.set_total_resources(info.get_resources_map());
SupervisorResources resources = nodeIdToResources.get().get(supervisorId);
if (resources != null && underlyingScheduler instanceof ResourceAwareScheduler) {
ret.set_used_mem(resources.getUsedMem());
ret.set_used_cpu(resources.getUsedCpu());
ret.set_used_generic_resources(resources.getUsedGenericResources());
if (isFragmented(resources)) {
final double availableCpu = resources.getAvailableCpu();
if (availableCpu < 0) {
LOG.warn("Negative fragmented CPU on {}", supervisorId);
}
ret.set_fragmented_cpu(availableCpu);
final double availableMem = resources.getAvailableMem();
if (availableMem < 0) {
LOG.warn("Negative fragmented Mem on {}", supervisorId);
}
ret.set_fragmented_mem(availableMem);
}
}
if (info.is_set_version()) {
ret.set_version(info.get_version());
}
if (blacklistedSupervisorIds.contains(supervisorId)) {
ret.set_blacklisted(true);
} else {
ret.set_blacklisted(false);
}
return ret;
}
use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.
the class Nimbus method launchServer.
@VisibleForTesting
public void launchServer() throws Exception {
try {
IStormClusterState state = stormClusterState;
NimbusInfo hpi = nimbusHostPortInfo;
LOG.info("Starting Nimbus with conf {}", ConfigUtils.maskPasswords(conf));
validator.prepare(conf);
// add to nimbuses
state.addNimbusHost(hpi.getHost(), new NimbusSummary(hpi.getHost(), hpi.getPort(), Time.currentTimeSecs(), false, STORM_VERSION));
leaderElector.addToLeaderLockQueue();
this.blobStore.startSyncBlobs();
for (ClusterMetricsConsumerExecutor exec : clusterConsumerExceutors) {
exec.prepare();
}
// Leadership coordination may be incomplete when launchServer is called. Previous behavior did a one time check
// which could cause Nimbus to not process TopologyActions.GAIN_LEADERSHIP transitions. Similar problem exists for
// HA Nimbus on being newly elected as leader. Change to a recurring pattern addresses these problems.
timer.scheduleRecurring(3, 5, () -> {
try {
boolean isLeader = isLeader();
if (isLeader && !wasLeader) {
for (String topoId : state.activeStorms()) {
transition(topoId, TopologyActions.GAIN_LEADERSHIP, null);
}
clusterMetricSet.setActive(true);
}
wasLeader = isLeader;
} catch (Exception e) {
throw new RuntimeException(e);
}
});
final boolean doNotReassign = (Boolean) conf.getOrDefault(ServerConfigUtils.NIMBUS_DO_NOT_REASSIGN, false);
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_MONITOR_FREQ_SECS)), () -> {
try {
if (!doNotReassign) {
mkAssignments();
}
doCleanup();
} catch (Exception e) {
throw new RuntimeException(e);
}
});
// Schedule Nimbus inbox cleaner
final int jarExpSecs = ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_INBOX_JAR_EXPIRATION_SECS));
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CLEANUP_INBOX_FREQ_SECS)), () -> {
try {
cleanInbox(getInbox(), jarExpSecs);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
// Schedule topology history cleaner
Integer interval = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_INTERVAL_SECS), null);
if (interval != null) {
final int lvCleanupAgeMins = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_AGE_MINS));
timer.scheduleRecurring(0, interval, () -> {
try {
cleanTopologyHistory(lvCleanupAgeMins);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CREDENTIAL_RENEW_FREQ_SECS)), () -> {
try {
renewCredentials();
} catch (Exception e) {
throw new RuntimeException(e);
}
});
// Periodically make sure the blobstore update time is up to date. This could have failed if Nimbus encountered
// an exception updating the update time, or due to bugs causing a missed update of the blobstore mod time on a blob
// update.
timer.scheduleRecurring(30, ServerConfigUtils.getLocalizerUpdateBlobInterval(conf) * 5, () -> {
try {
blobStore.validateBlobUpdateTime();
} catch (IOException e) {
throw new RuntimeException(e);
}
});
metricsRegistry.registerGauge("nimbus:total-available-memory-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableMem(), 0)).sum());
metricsRegistry.registerGauge("nimbus:available-cpu-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableCpu(), 0)).sum());
metricsRegistry.registerGauge("nimbus:total-memory", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalMem).sum());
metricsRegistry.registerGauge("nimbus:total-cpu", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalCpu).sum());
metricsRegistry.registerGauge("nimbus:longest-scheduling-time-ms", () -> {
// We want to update longest scheduling time in real time in case scheduler get stuck
// Get current time before startTime to avoid potential race with scheduler's Timer
Long currTime = Time.nanoTime();
Long startTime = schedulingStartTimeNs.get();
return TimeUnit.NANOSECONDS.toMillis(startTime == null ? longestSchedulingTime.get() : Math.max(currTime - startTime, longestSchedulingTime.get()));
});
metricsRegistry.registerMeter("nimbus:num-launched").mark();
timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.STORM_CLUSTER_METRICS_CONSUMER_PUBLISH_INTERVAL_SECS)), () -> {
try {
if (isLeader()) {
sendClusterMetricsToExecutors();
}
} catch (Exception e) {
throw new RuntimeException(e);
}
});
timer.scheduleRecurring(5, 5, clusterMetricSet);
} catch (Exception e) {
if (Utils.exceptionCauseIsInstanceOf(InterruptedException.class, e)) {
throw e;
}
if (Utils.exceptionCauseIsInstanceOf(InterruptedIOException.class, e)) {
throw e;
}
LOG.error("Error on initialization of nimbus", e);
Utils.exitProcess(13, "Error on initialization of nimbus");
}
}
Aggregations