Search in sources :

Example 61 with StormTopology

use of org.apache.storm.generated.StormTopology in project storm by apache.

the class CaptureLoad method captureTopology.

static TopologyLoadConf captureTopology(Nimbus.Iface client, TopologySummary topologySummary) throws Exception {
    String topologyName = topologySummary.get_name();
    LOG.info("Capturing {}...", topologyName);
    String topologyId = topologySummary.get_id();
    TopologyInfo info = client.getTopologyInfo(topologyId);
    TopologyPageInfo tpinfo = client.getTopologyPageInfo(topologyId, ":all-time", false);
    @SuppressWarnings("checkstyle:VariableDeclarationUsageDistance") StormTopology topo = client.getUserTopology(topologyId);
    // Done capturing topology information...
    Map<String, Object> savedTopoConf = new HashMap<>();
    Map<String, Object> topoConf = (Map<String, Object>) JSONValue.parse(client.getTopologyConf(topologyId));
    for (String key : TopologyLoadConf.IMPORTANT_CONF_KEYS) {
        Object o = topoConf.get(key);
        if (o != null) {
            savedTopoConf.put(key, o);
            LOG.info("with config {}: {}", key, o);
        }
    }
    // Lets use the number of actually scheduled workers as a way to bridge RAS and non-RAS
    int numWorkers = tpinfo.get_num_workers();
    if (savedTopoConf.containsKey(Config.TOPOLOGY_WORKERS)) {
        numWorkers = Math.max(numWorkers, ((Number) savedTopoConf.get(Config.TOPOLOGY_WORKERS)).intValue());
    }
    savedTopoConf.put(Config.TOPOLOGY_WORKERS, numWorkers);
    Map<String, LoadCompConf.Builder> boltBuilders = new HashMap<>();
    Map<String, LoadCompConf.Builder> spoutBuilders = new HashMap<>();
    List<InputStream.Builder> inputStreams = new ArrayList<>();
    Map<GlobalStreamId, OutputStream.Builder> outStreams = new HashMap<>();
    // Bolts
    if (topo.get_bolts() != null) {
        for (Map.Entry<String, Bolt> boltSpec : topo.get_bolts().entrySet()) {
            String boltComp = boltSpec.getKey();
            LOG.info("Found bolt {}...", boltComp);
            Bolt bolt = boltSpec.getValue();
            ComponentCommon common = bolt.get_common();
            Map<GlobalStreamId, Grouping> inputs = common.get_inputs();
            if (inputs != null) {
                for (Map.Entry<GlobalStreamId, Grouping> input : inputs.entrySet()) {
                    GlobalStreamId id = input.getKey();
                    LOG.info("with input {}...", id);
                    Grouping grouping = input.getValue();
                    InputStream.Builder builder = new InputStream.Builder().withId(id.get_streamId()).withFromComponent(id.get_componentId()).withToComponent(boltComp).withGroupingType(grouping);
                    inputStreams.add(builder);
                }
            }
            Map<String, StreamInfo> outputs = common.get_streams();
            if (outputs != null) {
                for (String name : outputs.keySet()) {
                    GlobalStreamId id = new GlobalStreamId(boltComp, name);
                    LOG.info("and output {}...", id);
                    OutputStream.Builder builder = new OutputStream.Builder().withId(name);
                    outStreams.put(id, builder);
                }
            }
            LoadCompConf.Builder builder = new LoadCompConf.Builder().withParallelism(common.get_parallelism_hint()).withId(boltComp);
            boltBuilders.put(boltComp, builder);
        }
        Map<String, Map<String, Double>> boltResources = getBoltsResources(topo, topoConf);
        for (Map.Entry<String, Map<String, Double>> entry : boltResources.entrySet()) {
            LoadCompConf.Builder bd = boltBuilders.get(entry.getKey());
            if (bd != null) {
                Map<String, Double> resources = entry.getValue();
                Double cpu = resources.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
                if (cpu != null) {
                    bd.withCpuLoad(cpu);
                }
                Double mem = resources.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
                if (mem != null) {
                    bd.withMemoryLoad(mem);
                }
            }
        }
    }
    // Spouts
    if (topo.get_spouts() != null) {
        for (Map.Entry<String, SpoutSpec> spoutSpec : topo.get_spouts().entrySet()) {
            String spoutComp = spoutSpec.getKey();
            LOG.info("Found Spout {}...", spoutComp);
            SpoutSpec spout = spoutSpec.getValue();
            ComponentCommon common = spout.get_common();
            Map<String, StreamInfo> outputs = common.get_streams();
            if (outputs != null) {
                for (String name : outputs.keySet()) {
                    GlobalStreamId id = new GlobalStreamId(spoutComp, name);
                    LOG.info("with output {}...", id);
                    OutputStream.Builder builder = new OutputStream.Builder().withId(name);
                    outStreams.put(id, builder);
                }
            }
            LoadCompConf.Builder builder = new LoadCompConf.Builder().withParallelism(common.get_parallelism_hint()).withId(spoutComp);
            spoutBuilders.put(spoutComp, builder);
        }
        Map<String, Map<String, Double>> spoutResources = getSpoutsResources(topo, topoConf);
        for (Map.Entry<String, Map<String, Double>> entry : spoutResources.entrySet()) {
            LoadCompConf.Builder sd = spoutBuilders.get(entry.getKey());
            if (sd != null) {
                Map<String, Double> resources = entry.getValue();
                Double cpu = resources.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
                if (cpu != null) {
                    sd.withCpuLoad(cpu);
                }
                Double mem = resources.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
                if (mem != null) {
                    sd.withMemoryLoad(mem);
                }
            }
        }
    }
    // Stats...
    Map<String, List<ExecutorSummary>> byComponent = new HashMap<>();
    for (ExecutorSummary executor : info.get_executors()) {
        String component = executor.get_component_id();
        List<ExecutorSummary> list = byComponent.get(component);
        if (list == null) {
            list = new ArrayList<>();
            byComponent.put(component, list);
        }
        list.add(executor);
    }
    List<InputStream> streams = new ArrayList<>(inputStreams.size());
    // Compute the stats for the different input streams
    for (InputStream.Builder builder : inputStreams) {
        GlobalStreamId streamId = new GlobalStreamId(builder.getFromComponent(), builder.getId());
        List<ExecutorSummary> summaries = byComponent.get(builder.getToComponent());
        // Execute and process latency...
        builder.withProcessTime(new NormalDistStats(extractBoltValues(summaries, streamId, BoltStats::get_process_ms_avg)));
        builder.withExecTime(new NormalDistStats(extractBoltValues(summaries, streamId, BoltStats::get_execute_ms_avg)));
        // InputStream is done
        streams.add(builder.build());
    }
    // There is a bug in some versions that returns 0 for the uptime.
    // To work around it we should get it an alternative (working) way.
    Map<String, Integer> workerToUptime = new HashMap<>();
    for (WorkerSummary ws : tpinfo.get_workers()) {
        workerToUptime.put(ws.get_supervisor_id() + ":" + ws.get_port(), ws.get_uptime_secs());
    }
    LOG.debug("WORKER TO UPTIME {}", workerToUptime);
    for (Map.Entry<GlobalStreamId, OutputStream.Builder> entry : outStreams.entrySet()) {
        OutputStream.Builder builder = entry.getValue();
        GlobalStreamId id = entry.getKey();
        List<Double> emittedRate = new ArrayList<>();
        List<ExecutorSummary> summaries = byComponent.get(id.get_componentId());
        if (summaries != null) {
            for (ExecutorSummary summary : summaries) {
                if (summary.is_set_stats()) {
                    int uptime = summary.get_uptime_secs();
                    LOG.debug("UPTIME {}", uptime);
                    if (uptime <= 0) {
                        // Likely it is because of a bug, so try to get it another way
                        String key = summary.get_host() + ":" + summary.get_port();
                        uptime = workerToUptime.getOrDefault(key, 1);
                        LOG.debug("Getting uptime for worker {}, {}", key, uptime);
                    }
                    for (Map.Entry<String, Map<String, Long>> statEntry : summary.get_stats().get_emitted().entrySet()) {
                        String timeWindow = statEntry.getKey();
                        long timeSecs = uptime;
                        try {
                            timeSecs = Long.valueOf(timeWindow);
                        } catch (NumberFormatException e) {
                        // Ignored...
                        }
                        timeSecs = Math.min(timeSecs, uptime);
                        Long count = statEntry.getValue().get(id.get_streamId());
                        if (count != null) {
                            LOG.debug("{} emitted {} for {} secs or {} tuples/sec", id, count, timeSecs, count.doubleValue() / timeSecs);
                            emittedRate.add(count.doubleValue() / timeSecs);
                        }
                    }
                }
            }
        }
        builder.withRate(new NormalDistStats(emittedRate));
        // The OutputStream is done
        LoadCompConf.Builder comp = boltBuilders.get(id.get_componentId());
        if (comp == null) {
            comp = spoutBuilders.get(id.get_componentId());
        }
        comp.withStream(builder.build());
    }
    List<LoadCompConf> spouts = spoutBuilders.values().stream().map((b) -> b.build()).collect(Collectors.toList());
    List<LoadCompConf> bolts = boltBuilders.values().stream().map((b) -> b.build()).collect(Collectors.toList());
    return new TopologyLoadConf(topologyName, savedTopoConf, spouts, bolts, streams);
}
Also used : Options(org.apache.commons.cli.Options) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) HelpFormatter(org.apache.commons.cli.HelpFormatter) Function(java.util.function.Function) ComponentCommon(org.apache.storm.generated.ComponentCommon) ArrayList(java.util.ArrayList) DefaultParser(org.apache.commons.cli.DefaultParser) TopologyPageInfo(org.apache.storm.generated.TopologyPageInfo) Bolt(org.apache.storm.generated.Bolt) NimbusClient(org.apache.storm.utils.NimbusClient) Nimbus(org.apache.storm.generated.Nimbus) StormTopology(org.apache.storm.generated.StormTopology) JSONValue(org.json.simple.JSONValue) Map(java.util.Map) CommandLine(org.apache.commons.cli.CommandLine) Option(org.apache.commons.cli.Option) Logger(org.slf4j.Logger) CommandLineParser(org.apache.commons.cli.CommandLineParser) JSONParser(org.json.simple.parser.JSONParser) WorkerSummary(org.apache.storm.generated.WorkerSummary) Grouping(org.apache.storm.generated.Grouping) StreamInfo(org.apache.storm.generated.StreamInfo) TopologyInfo(org.apache.storm.generated.TopologyInfo) Collectors(java.util.stream.Collectors) File(java.io.File) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) ExecutorSummary(org.apache.storm.generated.ExecutorSummary) List(java.util.List) ObjectReader(org.apache.storm.utils.ObjectReader) JSONObject(org.json.simple.JSONObject) ParseException(org.apache.commons.cli.ParseException) SpoutSpec(org.apache.storm.generated.SpoutSpec) Config(org.apache.storm.Config) TopologySummary(org.apache.storm.generated.TopologySummary) BoltStats(org.apache.storm.generated.BoltStats) ClusterSummary(org.apache.storm.generated.ClusterSummary) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutorSummary(org.apache.storm.generated.ExecutorSummary) ArrayList(java.util.ArrayList) List(java.util.List) ComponentCommon(org.apache.storm.generated.ComponentCommon) Bolt(org.apache.storm.generated.Bolt) Grouping(org.apache.storm.generated.Grouping) SpoutSpec(org.apache.storm.generated.SpoutSpec) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) JSONObject(org.json.simple.JSONObject) HashMap(java.util.HashMap) Map(java.util.Map) StormTopology(org.apache.storm.generated.StormTopology) BoltStats(org.apache.storm.generated.BoltStats) TopologyPageInfo(org.apache.storm.generated.TopologyPageInfo) WorkerSummary(org.apache.storm.generated.WorkerSummary) StreamInfo(org.apache.storm.generated.StreamInfo) TopologyInfo(org.apache.storm.generated.TopologyInfo)

Example 62 with StormTopology

use of org.apache.storm.generated.StormTopology in project storm by apache.

the class StormSqlLocalClusterImpl method runLocal.

public void runLocal(LocalCluster localCluster, Iterable<String> statements, Predicate<Void> waitCondition, long waitTimeoutMs) throws Exception {
    final Config conf = new Config();
    conf.setMaxSpoutPending(20);
    for (String sql : statements) {
        StormParser parser = new StormParser(sql);
        SqlNode node = parser.impl().parseSqlStmtEof();
        if (node instanceof SqlCreateTable) {
            sqlContext.interpretCreateTable((SqlCreateTable) node);
        } else if (node instanceof SqlCreateFunction) {
            sqlContext.interpretCreateFunction((SqlCreateFunction) node);
        } else {
            AbstractStreamsProcessor processor = sqlContext.compileSql(sql);
            StormTopology topo = processor.build();
            if (processor.getClassLoaders() != null && processor.getClassLoaders().size() > 0) {
                CompilingClassLoader lastClassloader = processor.getClassLoaders().get(processor.getClassLoaders().size() - 1);
                Utils.setClassLoaderForJavaDeSerialize(lastClassloader);
            }
            try (LocalCluster.LocalTopology stormTopo = localCluster.submitTopology("storm-sql", conf, topo)) {
                waitForCompletion(waitTimeoutMs, waitCondition);
            } finally {
                while (localCluster.getTopologySummaries().size() > 0) {
                    Thread.sleep(10);
                }
                Utils.resetClassLoaderForJavaDeSerialize();
            }
        }
    }
}
Also used : CompilingClassLoader(org.apache.storm.sql.javac.CompilingClassLoader) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) SqlCreateFunction(org.apache.storm.sql.parser.SqlCreateFunction) StormParser(org.apache.storm.sql.parser.StormParser) SqlCreateTable(org.apache.storm.sql.parser.SqlCreateTable) SqlNode(org.apache.calcite.sql.SqlNode)

Example 63 with StormTopology

use of org.apache.storm.generated.StormTopology in project storm by apache.

the class StreamBuilderTest method testMultiPartitionByKeyWithRepartition.

@Test
public void testMultiPartitionByKeyWithRepartition() {
    TopologyContext mockContext = Mockito.mock(TopologyContext.class);
    OutputCollector mockCollector = Mockito.mock(OutputCollector.class);
    Map<GlobalStreamId, Grouping> expected = new HashMap<>();
    expected.put(new GlobalStreamId("bolt2", "s3"), Grouping.fields(Collections.singletonList("key")));
    expected.put(new GlobalStreamId("bolt2", "s3__punctuation"), Grouping.all(new NullStruct()));
    Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0));
    stream.mapToPair(x -> Pair.of(x, x)).window(TumblingWindows.of(BaseWindowedBolt.Count.of(10))).reduceByKey((x, y) -> x + y).repartition(10).reduceByKey((x, y) -> 0).print();
    StormTopology topology = streamBuilder.build();
    assertEquals(3, topology.get_bolts_size());
    assertEquals(expected, topology.get_bolts().get("bolt3").get_common().get_inputs());
}
Also used : OutputFieldsDeclarer(org.apache.storm.topology.OutputFieldsDeclarer) BaseRichSpout(org.apache.storm.topology.base.BaseRichSpout) IRichSpout(org.apache.storm.topology.IRichSpout) BaseWindowedBolt(org.apache.storm.topology.base.BaseWindowedBolt) TopologyContext(org.apache.storm.task.TopologyContext) HashMap(java.util.HashMap) Count(org.apache.storm.streams.operations.aggregators.Count) Bolt(org.apache.storm.generated.Bolt) Tuple(org.apache.storm.tuple.Tuple) OutputCollector(org.apache.storm.task.OutputCollector) StormTopology(org.apache.storm.generated.StormTopology) Map(java.util.Map) BranchProcessor(org.apache.storm.streams.processors.BranchProcessor) ValueMapper(org.apache.storm.streams.operations.mappers.ValueMapper) Before(org.junit.Before) BaseRichBolt(org.apache.storm.topology.base.BaseRichBolt) PairValueMapper(org.apache.storm.streams.operations.mappers.PairValueMapper) Grouping(org.apache.storm.generated.Grouping) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Fields(org.apache.storm.tuple.Fields) Utils(org.apache.storm.utils.Utils) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Mockito(org.mockito.Mockito) TumblingWindows(org.apache.storm.streams.windowing.TumblingWindows) SpoutSpec(org.apache.storm.generated.SpoutSpec) IRichBolt(org.apache.storm.topology.IRichBolt) NullStruct(org.apache.storm.generated.NullStruct) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) OutputCollector(org.apache.storm.task.OutputCollector) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) HashMap(java.util.HashMap) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTopology(org.apache.storm.generated.StormTopology) Grouping(org.apache.storm.generated.Grouping) TopologyContext(org.apache.storm.task.TopologyContext) NullStruct(org.apache.storm.generated.NullStruct) Test(org.junit.Test)

Example 64 with StormTopology

use of org.apache.storm.generated.StormTopology in project storm by apache.

the class StreamBuilderTest method testBranch.

@Test
public void testBranch() throws Exception {
    Stream<Tuple> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID));
    Stream<Tuple>[] streams = stream.branch(x -> true);
    StormTopology topology = streamBuilder.build();
    assertEquals(1, topology.get_spouts_size());
    assertEquals(1, topology.get_bolts_size());
    Map<GlobalStreamId, Grouping> expected = new HashMap<>();
    String spoutId = topology.get_spouts().keySet().iterator().next();
    expected.put(new GlobalStreamId(spoutId, "default"), Grouping.shuffle(new NullStruct()));
    assertEquals(expected, topology.get_bolts().values().iterator().next().get_common().get_inputs());
    assertEquals(1, streams.length);
    assertEquals(1, streams[0].node.getOutputStreams().size());
    String parentStream = streams[0].node.getOutputStreams().iterator().next() + "-branch";
    assertEquals(1, streams[0].node.getParents(parentStream).size());
    Node processorNdoe = streams[0].node.getParents(parentStream).iterator().next();
    assertTrue(processorNdoe instanceof ProcessorNode);
    assertTrue(((ProcessorNode) processorNdoe).getProcessor() instanceof BranchProcessor);
    assertTrue(processorNdoe.getParents("default").iterator().next() instanceof SpoutNode);
}
Also used : HashMap(java.util.HashMap) StormTopology(org.apache.storm.generated.StormTopology) Grouping(org.apache.storm.generated.Grouping) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) BranchProcessor(org.apache.storm.streams.processors.BranchProcessor) NullStruct(org.apache.storm.generated.NullStruct) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 65 with StormTopology

use of org.apache.storm.generated.StormTopology in project storm by apache.

the class StreamBuilderTest method testBranchAndJoin.

@Test
public void testBranchAndJoin() throws Exception {
    TopologyContext mockContext = Mockito.mock(TopologyContext.class);
    OutputCollector mockCollector = Mockito.mock(OutputCollector.class);
    Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0), 2);
    Stream<Integer>[] streams = stream.branch(x -> x % 2 == 0, x -> x % 2 == 1);
    PairStream<Integer, Pair<Integer, Integer>> joined = streams[0].mapToPair(x -> Pair.of(x, 1)).join(streams[1].mapToPair(x -> Pair.of(x, 1)));
    assertTrue(joined.getNode() instanceof ProcessorNode);
    StormTopology topology = streamBuilder.build();
    assertEquals(2, topology.get_bolts_size());
}
Also used : OutputFieldsDeclarer(org.apache.storm.topology.OutputFieldsDeclarer) BaseRichSpout(org.apache.storm.topology.base.BaseRichSpout) IRichSpout(org.apache.storm.topology.IRichSpout) BaseWindowedBolt(org.apache.storm.topology.base.BaseWindowedBolt) TopologyContext(org.apache.storm.task.TopologyContext) HashMap(java.util.HashMap) Count(org.apache.storm.streams.operations.aggregators.Count) Bolt(org.apache.storm.generated.Bolt) Tuple(org.apache.storm.tuple.Tuple) OutputCollector(org.apache.storm.task.OutputCollector) StormTopology(org.apache.storm.generated.StormTopology) Map(java.util.Map) BranchProcessor(org.apache.storm.streams.processors.BranchProcessor) ValueMapper(org.apache.storm.streams.operations.mappers.ValueMapper) Before(org.junit.Before) BaseRichBolt(org.apache.storm.topology.base.BaseRichBolt) PairValueMapper(org.apache.storm.streams.operations.mappers.PairValueMapper) Grouping(org.apache.storm.generated.Grouping) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Fields(org.apache.storm.tuple.Fields) Utils(org.apache.storm.utils.Utils) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Mockito(org.mockito.Mockito) TumblingWindows(org.apache.storm.streams.windowing.TumblingWindows) SpoutSpec(org.apache.storm.generated.SpoutSpec) IRichBolt(org.apache.storm.topology.IRichBolt) NullStruct(org.apache.storm.generated.NullStruct) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) OutputCollector(org.apache.storm.task.OutputCollector) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) StormTopology(org.apache.storm.generated.StormTopology) TopologyContext(org.apache.storm.task.TopologyContext) Test(org.junit.Test)

Aggregations

StormTopology (org.apache.storm.generated.StormTopology)162 Config (org.apache.storm.Config)72 HashMap (java.util.HashMap)67 Test (org.junit.Test)59 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)44 Map (java.util.Map)35 ArrayList (java.util.ArrayList)29 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)27 Test (org.junit.jupiter.api.Test)26 List (java.util.List)24 Bolt (org.apache.storm.generated.Bolt)23 Values (org.apache.storm.tuple.Values)23 StormMetricsRegistry (org.apache.storm.metric.StormMetricsRegistry)22 Cluster (org.apache.storm.scheduler.Cluster)22 SupervisorDetails (org.apache.storm.scheduler.SupervisorDetails)22 Topologies (org.apache.storm.scheduler.Topologies)22 Fields (org.apache.storm.tuple.Fields)22 INimbus (org.apache.storm.scheduler.INimbus)21 TopologyDef (org.apache.storm.flux.model.TopologyDef)20 TestUtilsForResourceAwareScheduler (org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler)20