Search in sources :

Example 1 with SupervisorResources

use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.

the class TestGenericResourceAwareStrategy method testGenericResourceAwareStrategySharedMemory.

/**
 * test if the scheduling logic for the GenericResourceAwareStrategy is correct.
 */
@Test
public void testGenericResourceAwareStrategySharedMemory() {
    int spoutParallelism = 2;
    int boltParallelism = 2;
    int numBolts = 3;
    double cpuPercent = 10;
    double memoryOnHeap = 10;
    double memoryOffHeap = 10;
    double sharedOnHeap = 500;
    double sharedOffHeapNode = 700;
    double sharedOffHeapWorker = 500;
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("spout", new TestSpout(), spoutParallelism).addResource("gpu.count", 1.0);
    builder.setBolt("bolt-1", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWorker, "bolt-1 shared off heap worker")).shuffleGrouping("spout");
    builder.setBolt("bolt-2", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapNode, "bolt-2 shared node")).shuffleGrouping("bolt-1");
    builder.setBolt("bolt-3", new TestBolt(), boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeap, "bolt-3 shared worker")).shuffleGrouping("bolt-2");
    StormTopology stormToplogy = builder.createTopology();
    INimbus iNimbus = new INimbusTest();
    Config conf = createGrasClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap());
    Map<String, Double> genericResourcesMap = new HashMap<>();
    genericResourcesMap.put("gpu.count", 1.0);
    Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap);
    conf.put(Config.TOPOLOGY_PRIORITY, 0);
    conf.put(Config.TOPOLOGY_NAME, "testTopology");
    conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
    TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(stormToplogy), currentTime, "user");
    Topologies topologies = new Topologies(topo);
    Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
    scheduler = new ResourceAwareScheduler();
    scheduler.prepare(conf, new StormMetricsRegistry());
    scheduler.schedule(topologies, cluster);
    for (Entry<String, SupervisorResources> entry : cluster.getSupervisorsResourcesMap().entrySet()) {
        String supervisorId = entry.getKey();
        SupervisorResources resources = entry.getValue();
        assertTrue(supervisorId, resources.getTotalCpu() >= resources.getUsedCpu());
        assertTrue(supervisorId, resources.getTotalMem() >= resources.getUsedMem());
    }
    // If we didn't take GPUs into account everything would fit under a single slot
    // But because there is only 1 GPU per node, and each of the 2 spouts needs a GPU
    // It has to be scheduled on at least 2 nodes, and hence 2 slots.
    // Because of this all of the bolts will be scheduled on a single slot with one of
    // the spouts and the other spout is on its own slot.  So everything that can be shared is
    // shared.
    int totalNumberOfTasks = (spoutParallelism + (boltParallelism * numBolts));
    double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
    double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeap;
    double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker;
    SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId());
    Set<WorkerSlot> slots = assignment.getSlots();
    Map<String, Double> nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapNodeMemory();
    LOG.info("NODE TO SHARED OFF HEAP {}", nodeToTotalShared);
    Map<WorkerSlot, WorkerResources> scheduledResources = assignment.getScheduledResources();
    assertEquals(2, slots.size());
    assertEquals(2, nodeToTotalShared.size());
    assertEquals(2, scheduledResources.size());
    double totalFoundCPU = 0.0;
    double totalFoundOnHeap = 0.0;
    double totalFoundWorkerOffHeap = 0.0;
    for (WorkerSlot ws : slots) {
        WorkerResources resources = scheduledResources.get(ws);
        totalFoundCPU += resources.get_cpu();
        totalFoundOnHeap += resources.get_mem_on_heap();
        totalFoundWorkerOffHeap += resources.get_mem_off_heap();
    }
    assertEquals(totalExpectedCPU, totalFoundCPU, 0.01);
    assertEquals(totalExpectedOnHeap, totalFoundOnHeap, 0.01);
    assertEquals(totalExpectedWorkerOffHeap, totalFoundWorkerOffHeap, 0.01);
    assertEquals(sharedOffHeapNode, nodeToTotalShared.values().stream().mapToDouble((d) -> d).sum(), 0.01);
    assertEquals(sharedOnHeap, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_on_heap).sum(), 0.01);
    assertEquals(sharedOffHeapWorker, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_off_heap).sum(), 0.01);
}
Also used : SharedOnHeap(org.apache.storm.topology.SharedOnHeap) SharedOffHeapWithinWorker(org.apache.storm.topology.SharedOffHeapWithinWorker) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) HashMap(java.util.HashMap) DaemonConfig(org.apache.storm.DaemonConfig) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) StormMetricsRegistry(org.apache.storm.metric.StormMetricsRegistry) ResourceAwareScheduler(org.apache.storm.scheduler.resource.ResourceAwareScheduler) TestUtilsForResourceAwareScheduler(org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler) ResourceMetrics(org.apache.storm.scheduler.resource.normalization.ResourceMetrics) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Topologies(org.apache.storm.scheduler.Topologies) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) SharedOffHeapWithinNode(org.apache.storm.topology.SharedOffHeapWithinNode) SupervisorResources(org.apache.storm.scheduler.SupervisorResources) WorkerResources(org.apache.storm.generated.WorkerResources) Cluster(org.apache.storm.scheduler.Cluster) INimbus(org.apache.storm.scheduler.INimbus) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Test(org.junit.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with SupervisorResources

use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.

the class TestDefaultResourceAwareStrategy method testDefaultResourceAwareStrategySharedMemory.

/**
 * test if the scheduling shared memory is correct with/without oneExecutorPerWorker enabled
 */
@ParameterizedTest
@EnumSource(WorkerRestrictionType.class)
public void testDefaultResourceAwareStrategySharedMemory(WorkerRestrictionType schedulingLimitation) {
    int spoutParallelism = 2;
    int boltParallelism = 2;
    int numBolts = 3;
    double cpuPercent = 10;
    double memoryOnHeap = 10;
    double memoryOffHeap = 10;
    double sharedOnHeapWithinWorker = 400;
    double sharedOffHeapWithinNode = 700;
    double sharedOffHeapWithinWorker = 600;
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("spout", new TestSpout(), spoutParallelism);
    builder.setBolt("bolt-1", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout");
    builder.setBolt("bolt-2", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1");
    builder.setBolt("bolt-3", new TestBolt(), boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2");
    StormTopology stormToplogy = builder.createTopology();
    INimbus iNimbus = new INimbusTest();
    Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 500, 2000);
    Config conf = createClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null);
    conf.put(Config.TOPOLOGY_PRIORITY, 0);
    conf.put(Config.TOPOLOGY_NAME, "testTopology");
    conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
    switch(schedulingLimitation) {
        case WORKER_RESTRICTION_ONE_EXECUTOR:
            conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true);
            break;
        case WORKER_RESTRICTION_ONE_COMPONENT:
            conf.put(Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER, true);
            break;
    }
    TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(stormToplogy), CURRENT_TIME, "user");
    Topologies topologies = new Topologies(topo);
    Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
    scheduler = new ResourceAwareScheduler();
    scheduler.prepare(conf, new StormMetricsRegistry());
    scheduler.schedule(topologies, cluster);
    for (Entry<String, SupervisorResources> entry : cluster.getSupervisorsResourcesMap().entrySet()) {
        String supervisorId = entry.getKey();
        SupervisorResources resources = entry.getValue();
        assertTrue(supervisorId, resources.getTotalCpu() >= resources.getUsedCpu());
        assertTrue(supervisorId, resources.getTotalMem() >= resources.getUsedMem());
    }
    int totalNumberOfTasks = spoutParallelism + boltParallelism * numBolts;
    SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId());
    TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId());
    long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count();
    String assignmentString = "Assignments:\n\t" + assignment.getSlotToExecutors().entrySet().stream().map(x -> String.format("Node=%s, components=%s", x.getKey().getNodeId(), x.getValue().stream().map(y -> topo.getComponentFromExecutor(y)).collect(Collectors.joining(",")))).collect(Collectors.joining("\n\t"));
    if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_NONE) {
        // Everything should fit in a single slot
        double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
        double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker;
        double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker;
        assertThat(assignment.getSlots().size(), is(1));
        WorkerSlot ws = assignment.getSlots().iterator().next();
        String nodeId = ws.getNodeId();
        assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().size(), is(1));
        assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().get(nodeId), closeTo(sharedOffHeapWithinNode, 0.01));
        assertThat(assignment.getScheduledResources().size(), is(1));
        WorkerResources resources = assignment.getScheduledResources().get(ws);
        assertThat(resources.get_cpu(), closeTo(totalExpectedCPU, 0.01));
        assertThat(resources.get_mem_on_heap(), closeTo(totalExpectedOnHeap, 0.01));
        assertThat(resources.get_mem_off_heap(), closeTo(totalExpectedWorkerOffHeap, 0.01));
        assertThat(resources.get_shared_mem_on_heap(), closeTo(sharedOnHeapWithinWorker, 0.01));
        assertThat(resources.get_shared_mem_off_heap(), closeTo(sharedOffHeapWithinWorker, 0.01));
    } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR) {
        double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + 2 * sharedOnHeapWithinWorker;
        double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode;
        double expectedMemSharedOnHeap = 2 * sharedOnHeapWithinWorker;
        double expectedMemSharedOffHeap = 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode;
        double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap;
        double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap;
        assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01));
        assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01));
        assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01));
        assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01));
        assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01));
        assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01));
        double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
        assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01));
        int numAssignedWorkers = cluster.getAssignedNumWorkers(topo);
        assertThat(numAssignedWorkers, is(8));
        assertThat(assignment.getSlots().size(), is(8));
        assertThat(assignmentString, numNodes, is(2L));
    } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT) {
        double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker;
        double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker + sharedOffHeapWithinNode;
        double expectedMemSharedOnHeap = sharedOnHeapWithinWorker;
        double expectedMemSharedOffHeap = sharedOffHeapWithinWorker + sharedOffHeapWithinNode;
        double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap;
        double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap;
        assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01));
        assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01));
        assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01));
        assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01));
        assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01));
        assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01));
        double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
        assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01));
        int numAssignedWorkers = cluster.getAssignedNumWorkers(topo);
        assertThat(numAssignedWorkers, is(4));
        assertThat(assignment.getSlots().size(), is(4));
        assertThat(numNodes, is(1L));
    }
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) INimbus(org.apache.storm.scheduler.INimbus) SupervisorResources(org.apache.storm.scheduler.SupervisorResources) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) Matchers.closeTo(org.hamcrest.Matchers.closeTo) ResourceMetrics(org.apache.storm.scheduler.resource.normalization.ResourceMetrics) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Map(java.util.Map) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) NodeSorterHostProximity(org.apache.storm.scheduler.resource.strategies.scheduling.sorter.NodeSorterHostProximity) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) DNSToSwitchMapping(org.apache.storm.networktopography.DNSToSwitchMapping) Collection(java.util.Collection) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) Collectors(java.util.stream.Collectors) SharedOnHeap(org.apache.storm.topology.SharedOnHeap) Test(org.junit.jupiter.api.Test) WorkerResources(org.apache.storm.generated.WorkerResources) List(java.util.List) TestUtilsForResourceAwareScheduler(org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler) Entry(java.util.Map.Entry) Config(org.apache.storm.Config) Matchers.is(org.hamcrest.Matchers.is) InvalidTopologyException(org.apache.storm.generated.InvalidTopologyException) StormCommon(org.apache.storm.daemon.StormCommon) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) IScheduler(org.apache.storm.scheduler.IScheduler) RasNode(org.apache.storm.scheduler.resource.RasNode) SharedOffHeapWithinNode(org.apache.storm.topology.SharedOffHeapWithinNode) NodeSorter(org.apache.storm.scheduler.resource.strategies.scheduling.sorter.NodeSorter) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Topologies(org.apache.storm.scheduler.Topologies) ServerUtils(org.apache.storm.utils.ServerUtils) StormTopology(org.apache.storm.generated.StormTopology) NormalizedResourcesExtension(org.apache.storm.scheduler.resource.normalization.NormalizedResourcesExtension) LinkedList(java.util.LinkedList) StormMetricsRegistry(org.apache.storm.metric.StormMetricsRegistry) ValueSource(org.junit.jupiter.params.provider.ValueSource) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) SharedOffHeapWithinWorker(org.apache.storm.topology.SharedOffHeapWithinWorker) INodeSorter(org.apache.storm.scheduler.resource.strategies.scheduling.sorter.INodeSorter) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) TopologyResources(org.apache.storm.daemon.nimbus.TopologyResources) Cluster(org.apache.storm.scheduler.Cluster) ResourceAwareScheduler(org.apache.storm.scheduler.resource.ResourceAwareScheduler) SchedulingResult(org.apache.storm.scheduler.resource.SchedulingResult) Nimbus(org.apache.storm.daemon.nimbus.Nimbus) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Assert(org.junit.Assert) Collections(java.util.Collections) SharedOnHeap(org.apache.storm.topology.SharedOnHeap) SharedOffHeapWithinWorker(org.apache.storm.topology.SharedOffHeapWithinWorker) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) StormMetricsRegistry(org.apache.storm.metric.StormMetricsRegistry) TestUtilsForResourceAwareScheduler(org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler) ResourceAwareScheduler(org.apache.storm.scheduler.resource.ResourceAwareScheduler) ResourceMetrics(org.apache.storm.scheduler.resource.normalization.ResourceMetrics) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Topologies(org.apache.storm.scheduler.Topologies) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) SharedOffHeapWithinNode(org.apache.storm.topology.SharedOffHeapWithinNode) SupervisorResources(org.apache.storm.scheduler.SupervisorResources) WorkerResources(org.apache.storm.generated.WorkerResources) Cluster(org.apache.storm.scheduler.Cluster) INimbus(org.apache.storm.scheduler.INimbus) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) TopologyResources(org.apache.storm.daemon.nimbus.TopologyResources) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with SupervisorResources

use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.

the class Nimbus method makeSupervisorSummary.

private SupervisorSummary makeSupervisorSummary(String supervisorId, SupervisorInfo info) {
    Set<String> blacklistedSupervisorIds = Collections.emptySet();
    if (scheduler instanceof BlacklistScheduler) {
        BlacklistScheduler bs = (BlacklistScheduler) scheduler;
        blacklistedSupervisorIds = bs.getBlacklistSupervisorIds();
    }
    LOG.debug("INFO: {} ID: {}", info, supervisorId);
    int numPorts = 0;
    if (info.is_set_meta()) {
        numPorts = info.get_meta_size();
    }
    int numUsedPorts = 0;
    if (info.is_set_used_ports()) {
        numUsedPorts = info.get_used_ports_size();
    }
    LOG.debug("NUM PORTS: {}", numPorts);
    SupervisorSummary ret = new SupervisorSummary(info.get_hostname(), (int) info.get_uptime_secs(), numPorts, numUsedPorts, supervisorId);
    ret.set_total_resources(info.get_resources_map());
    SupervisorResources resources = nodeIdToResources.get().get(supervisorId);
    if (resources != null && underlyingScheduler instanceof ResourceAwareScheduler) {
        ret.set_used_mem(resources.getUsedMem());
        ret.set_used_cpu(resources.getUsedCpu());
        ret.set_used_generic_resources(resources.getUsedGenericResources());
        if (isFragmented(resources)) {
            final double availableCpu = resources.getAvailableCpu();
            if (availableCpu < 0) {
                LOG.warn("Negative fragmented CPU on {}", supervisorId);
            }
            ret.set_fragmented_cpu(availableCpu);
            final double availableMem = resources.getAvailableMem();
            if (availableMem < 0) {
                LOG.warn("Negative fragmented Mem on {}", supervisorId);
            }
            ret.set_fragmented_mem(availableMem);
        }
    }
    if (info.is_set_version()) {
        ret.set_version(info.get_version());
    }
    if (blacklistedSupervisorIds.contains(supervisorId)) {
        ret.set_blacklisted(true);
    } else {
        ret.set_blacklisted(false);
    }
    return ret;
}
Also used : SupervisorResources(org.apache.storm.scheduler.SupervisorResources) BlacklistScheduler(org.apache.storm.scheduler.blacklist.BlacklistScheduler) SupervisorSummary(org.apache.storm.generated.SupervisorSummary) WorkerMetricPoint(org.apache.storm.generated.WorkerMetricPoint) DataPoint(org.apache.storm.metric.api.DataPoint) ResourceAwareScheduler(org.apache.storm.scheduler.resource.ResourceAwareScheduler)

Example 4 with SupervisorResources

use of org.apache.storm.scheduler.SupervisorResources in project storm by apache.

the class Nimbus method launchServer.

@VisibleForTesting
public void launchServer() throws Exception {
    try {
        IStormClusterState state = stormClusterState;
        NimbusInfo hpi = nimbusHostPortInfo;
        LOG.info("Starting Nimbus with conf {}", ConfigUtils.maskPasswords(conf));
        validator.prepare(conf);
        // add to nimbuses
        state.addNimbusHost(hpi.getHost(), new NimbusSummary(hpi.getHost(), hpi.getPort(), Time.currentTimeSecs(), false, STORM_VERSION));
        leaderElector.addToLeaderLockQueue();
        this.blobStore.startSyncBlobs();
        for (ClusterMetricsConsumerExecutor exec : clusterConsumerExceutors) {
            exec.prepare();
        }
        // Leadership coordination may be incomplete when launchServer is called. Previous behavior did a one time check
        // which could cause Nimbus to not process TopologyActions.GAIN_LEADERSHIP transitions. Similar problem exists for
        // HA Nimbus on being newly elected as leader. Change to a recurring pattern addresses these problems.
        timer.scheduleRecurring(3, 5, () -> {
            try {
                boolean isLeader = isLeader();
                if (isLeader && !wasLeader) {
                    for (String topoId : state.activeStorms()) {
                        transition(topoId, TopologyActions.GAIN_LEADERSHIP, null);
                    }
                    clusterMetricSet.setActive(true);
                }
                wasLeader = isLeader;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        final boolean doNotReassign = (Boolean) conf.getOrDefault(ServerConfigUtils.NIMBUS_DO_NOT_REASSIGN, false);
        timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_MONITOR_FREQ_SECS)), () -> {
            try {
                if (!doNotReassign) {
                    mkAssignments();
                }
                doCleanup();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        // Schedule Nimbus inbox cleaner
        final int jarExpSecs = ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_INBOX_JAR_EXPIRATION_SECS));
        timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CLEANUP_INBOX_FREQ_SECS)), () -> {
            try {
                cleanInbox(getInbox(), jarExpSecs);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        // Schedule topology history cleaner
        Integer interval = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_INTERVAL_SECS), null);
        if (interval != null) {
            final int lvCleanupAgeMins = ObjectReader.getInt(conf.get(DaemonConfig.LOGVIEWER_CLEANUP_AGE_MINS));
            timer.scheduleRecurring(0, interval, () -> {
                try {
                    cleanTopologyHistory(lvCleanupAgeMins);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            });
        }
        timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.NIMBUS_CREDENTIAL_RENEW_FREQ_SECS)), () -> {
            try {
                renewCredentials();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        // Periodically make sure the blobstore update time is up to date.  This could have failed if Nimbus encountered
        // an exception updating the update time, or due to bugs causing a missed update of the blobstore mod time on a blob
        // update.
        timer.scheduleRecurring(30, ServerConfigUtils.getLocalizerUpdateBlobInterval(conf) * 5, () -> {
            try {
                blobStore.validateBlobUpdateTime();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        metricsRegistry.registerGauge("nimbus:total-available-memory-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableMem(), 0)).sum());
        metricsRegistry.registerGauge("nimbus:available-cpu-non-negative", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(supervisorResources -> Math.max(supervisorResources.getAvailableCpu(), 0)).sum());
        metricsRegistry.registerGauge("nimbus:total-memory", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalMem).sum());
        metricsRegistry.registerGauge("nimbus:total-cpu", () -> nodeIdToResources.get().values().parallelStream().mapToDouble(SupervisorResources::getTotalCpu).sum());
        metricsRegistry.registerGauge("nimbus:longest-scheduling-time-ms", () -> {
            // We want to update longest scheduling time in real time in case scheduler get stuck
            // Get current time before startTime to avoid potential race with scheduler's Timer
            Long currTime = Time.nanoTime();
            Long startTime = schedulingStartTimeNs.get();
            return TimeUnit.NANOSECONDS.toMillis(startTime == null ? longestSchedulingTime.get() : Math.max(currTime - startTime, longestSchedulingTime.get()));
        });
        metricsRegistry.registerMeter("nimbus:num-launched").mark();
        timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.STORM_CLUSTER_METRICS_CONSUMER_PUBLISH_INTERVAL_SECS)), () -> {
            try {
                if (isLeader()) {
                    sendClusterMetricsToExecutors();
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        timer.scheduleRecurring(5, 5, clusterMetricSet);
    } catch (Exception e) {
        if (Utils.exceptionCauseIsInstanceOf(InterruptedException.class, e)) {
            throw e;
        }
        if (Utils.exceptionCauseIsInstanceOf(InterruptedIOException.class, e)) {
            throw e;
        }
        LOG.error("Error on initialization of nimbus", e);
        Utils.exitProcess(13, "Error on initialization of nimbus");
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) SupervisorResources(org.apache.storm.scheduler.SupervisorResources) ClusterMetricsConsumerExecutor(org.apache.storm.metric.ClusterMetricsConsumerExecutor) NimbusSummary(org.apache.storm.generated.NimbusSummary) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) WrappedAuthorizationException(org.apache.storm.utils.WrappedAuthorizationException) IOException(java.io.IOException) IllegalStateException(org.apache.storm.generated.IllegalStateException) AlreadyAliveException(org.apache.storm.generated.AlreadyAliveException) WrappedNotAliveException(org.apache.storm.utils.WrappedNotAliveException) WrappedInvalidTopologyException(org.apache.storm.utils.WrappedInvalidTopologyException) AuthorizationException(org.apache.storm.generated.AuthorizationException) NotAliveException(org.apache.storm.generated.NotAliveException) WrappedAlreadyAliveException(org.apache.storm.utils.WrappedAlreadyAliveException) InterruptedIOException(java.io.InterruptedIOException) KeyAlreadyExistsException(org.apache.storm.generated.KeyAlreadyExistsException) TException(org.apache.storm.thrift.TException) WrappedIllegalStateException(org.apache.storm.utils.WrappedIllegalStateException) KeyNotFoundException(org.apache.storm.generated.KeyNotFoundException) InvalidTopologyException(org.apache.storm.generated.InvalidTopologyException) BindException(java.net.BindException) WorkerMetricPoint(org.apache.storm.generated.WorkerMetricPoint) DataPoint(org.apache.storm.metric.api.DataPoint) NimbusInfo(org.apache.storm.nimbus.NimbusInfo) AtomicLong(java.util.concurrent.atomic.AtomicLong) IStormClusterState(org.apache.storm.cluster.IStormClusterState) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) VisibleForTesting(org.apache.storm.shade.com.google.common.annotations.VisibleForTesting)

Aggregations

SupervisorResources (org.apache.storm.scheduler.SupervisorResources)4 ResourceAwareScheduler (org.apache.storm.scheduler.resource.ResourceAwareScheduler)3 HashMap (java.util.HashMap)2 Config (org.apache.storm.Config)2 InvalidTopologyException (org.apache.storm.generated.InvalidTopologyException)2 StormTopology (org.apache.storm.generated.StormTopology)2 WorkerMetricPoint (org.apache.storm.generated.WorkerMetricPoint)2 WorkerResources (org.apache.storm.generated.WorkerResources)2 StormMetricsRegistry (org.apache.storm.metric.StormMetricsRegistry)2 DataPoint (org.apache.storm.metric.api.DataPoint)2 Cluster (org.apache.storm.scheduler.Cluster)2 INimbus (org.apache.storm.scheduler.INimbus)2 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)2 SupervisorDetails (org.apache.storm.scheduler.SupervisorDetails)2 Topologies (org.apache.storm.scheduler.Topologies)2 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)2 WorkerSlot (org.apache.storm.scheduler.WorkerSlot)2 TestUtilsForResourceAwareScheduler (org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler)2 ResourceMetrics (org.apache.storm.scheduler.resource.normalization.ResourceMetrics)2 SharedOffHeapWithinNode (org.apache.storm.topology.SharedOffHeapWithinNode)2