use of org.apache.storm.generated.ComponentCommon in project flink by apache.
the class FlinkTopology method translateTopology.
/**
* Creates a Flink program that uses the specified spouts and bolts.
*/
private void translateTopology() {
unprocessdInputsPerBolt.clear();
outputStreams.clear();
declarers.clear();
availableInputs.clear();
// Storm defaults to parallelism 1
env.setParallelism(1);
for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
final String spoutId = spout.getKey();
final IRichSpout userSpout = spout.getValue();
final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
userSpout.declareOutputFields(declarer);
final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
this.outputStreams.put(spoutId, sourceStreams);
declarers.put(spoutId, declarer);
final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
final DataStreamSource<?> source;
if (sourceStreams.size() == 1) {
final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
spoutWrapperSingleOutput.setStormTopology(stormTopology);
final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
outputStreams.put(outputStreamId, src);
source = src;
} else {
final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
@SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
for (String streamId : sourceStreams.keySet()) {
SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
outputStreams.put(streamId, outStream);
}
source = multiSource;
}
availableInputs.put(spoutId, outputStreams);
final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
source.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
/**
* 1. Connect all spout streams with bolts streams
* 2. Then proceed with the bolts stream already connected
*
* Because we do not know the order in which an iterator steps over a set, we might process a consumer before
* its producer
* ->thus, we might need to repeat multiple times
*/
boolean makeProgress = true;
while (bolts.size() > 0) {
if (!makeProgress) {
StringBuilder strBld = new StringBuilder();
strBld.append("Unable to build Topology. Could not connect the following bolts:");
for (String boltId : bolts.keySet()) {
strBld.append("\n ");
strBld.append(boltId);
strBld.append(": missing input streams [");
for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
strBld.append("'");
strBld.append(streams.getKey().get_streamId());
strBld.append("' from '");
strBld.append(streams.getKey().get_componentId());
strBld.append("'; ");
}
strBld.append("]");
}
throw new RuntimeException(strBld.toString());
}
makeProgress = false;
final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
while (boltsIterator.hasNext()) {
final Entry<String, IRichBolt> bolt = boltsIterator.next();
final String boltId = bolt.getKey();
final IRichBolt userBolt = copyObject(bolt.getValue());
final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
if (unprocessedBoltInputs == null) {
unprocessedBoltInputs = new HashSet<>();
unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
}
// check if all inputs are available
final int numberOfInputs = unprocessedBoltInputs.size();
int inputsAvailable = 0;
for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
final String producerId = entry.getKey().get_componentId();
final String streamId = entry.getKey().get_streamId();
final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
if (streams != null && streams.get(streamId) != null) {
inputsAvailable++;
}
}
if (inputsAvailable != numberOfInputs) {
// traverse other bolts first until inputs are available
continue;
} else {
makeProgress = true;
boltsIterator.remove();
}
final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
final GlobalStreamId streamId = input.getKey();
final Grouping grouping = input.getValue();
final String producerId = streamId.get_componentId();
final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
}
final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
outputStream.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
}
}
use of org.apache.storm.generated.ComponentCommon in project flink by apache.
the class WrapperSetupHelper method createTopologyContext.
/**
* Creates a {@link TopologyContext} for a Spout or Bolt instance (ie, Flink task / Storm executor).
*
* @param context
* The Flink runtime context.
* @param spoutOrBolt
* The Spout or Bolt this context is created for.
* @param stormTopology
* The original Storm topology.
* @param stormConfig
* The user provided configuration.
* @return The created {@link TopologyContext}.
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
static synchronized TopologyContext createTopologyContext(final StreamingRuntimeContext context, final IComponent spoutOrBolt, final String operatorName, StormTopology stormTopology, final Map stormConfig) {
final int dop = context.getNumberOfParallelSubtasks();
final Map<Integer, String> taskToComponents = new HashMap<Integer, String>();
final Map<String, List<Integer>> componentToSortedTasks = new HashMap<String, List<Integer>>();
final Map<String, Map<String, Fields>> componentToStreamToFields = new HashMap<String, Map<String, Fields>>();
String stormId = (String) stormConfig.get(TOPOLOGY_NAME);
// not supported
String codeDir = null;
// not supported
String pidDir = null;
Integer taskId = -1;
// not supported
Integer workerPort = null;
List<Integer> workerTasks = new ArrayList<Integer>();
final Map<String, Object> defaultResources = new HashMap<String, Object>();
final Map<String, Object> userResources = new HashMap<String, Object>();
final Map<String, Object> executorData = new HashMap<String, Object>();
final Map registeredMetrics = new HashMap();
Atom openOrPrepareWasCalled = null;
if (stormTopology == null) {
// embedded mode
ComponentCommon common = new ComponentCommon();
common.set_parallelism_hint(dop);
HashMap<String, SpoutSpec> spouts = new HashMap<String, SpoutSpec>();
HashMap<String, Bolt> bolts = new HashMap<String, Bolt>();
if (spoutOrBolt instanceof IRichSpout) {
spouts.put(operatorName, new SpoutSpec(null, common));
} else {
assert (spoutOrBolt instanceof IRichBolt);
bolts.put(operatorName, new Bolt(null, common));
}
stormTopology = new StormTopology(spouts, bolts, new HashMap<String, StateSpoutSpec>());
List<Integer> sortedTasks = new ArrayList<Integer>(dop);
for (int i = 1; i <= dop; ++i) {
taskToComponents.put(i, operatorName);
sortedTasks.add(i);
}
componentToSortedTasks.put(operatorName, sortedTasks);
SetupOutputFieldsDeclarer declarer = new SetupOutputFieldsDeclarer();
spoutOrBolt.declareOutputFields(declarer);
componentToStreamToFields.put(operatorName, declarer.outputStreams);
} else {
// whole topology is built (i.e. FlinkTopology is used)
Map<String, SpoutSpec> spouts = stormTopology.get_spouts();
Map<String, Bolt> bolts = stormTopology.get_bolts();
Map<String, StateSpoutSpec> stateSpouts = stormTopology.get_state_spouts();
tid = 1;
for (Entry<String, SpoutSpec> spout : spouts.entrySet()) {
Integer rc = processSingleOperator(spout.getKey(), spout.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
if (rc != null) {
taskId = rc;
}
}
for (Entry<String, Bolt> bolt : bolts.entrySet()) {
Integer rc = processSingleOperator(bolt.getKey(), bolt.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
if (rc != null) {
taskId = rc;
}
}
for (Entry<String, StateSpoutSpec> stateSpout : stateSpouts.entrySet()) {
Integer rc = processSingleOperator(stateSpout.getKey(), stateSpout.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
if (rc != null) {
taskId = rc;
}
}
assert (taskId != null);
}
if (!stormConfig.containsKey(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)) {
// Storm default value
stormConfig.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, 30);
}
return new FlinkTopologyContext(stormTopology, stormConfig, taskToComponents, componentToSortedTasks, componentToStreamToFields, stormId, codeDir, pidDir, taskId, workerPort, workerTasks, defaultResources, userResources, executorData, registeredMetrics, openOrPrepareWasCalled);
}
use of org.apache.storm.generated.ComponentCommon in project flink by apache.
the class WrapperSetupInLocalClusterTest method testCreateTopologyContext.
@Test
public void testCreateTopologyContext() {
HashMap<String, Integer> dops = new HashMap<String, Integer>();
dops.put("spout1", 1);
dops.put("spout2", 3);
dops.put("bolt1", 1);
dops.put("bolt2", 2);
dops.put("sink", 1);
HashMap<String, Integer> taskCounter = new HashMap<String, Integer>();
taskCounter.put("spout1", 0);
taskCounter.put("spout2", 0);
taskCounter.put("bolt1", 0);
taskCounter.put("bolt2", 0);
taskCounter.put("sink", 0);
HashMap<String, IComponent> operators = new HashMap<String, IComponent>();
operators.put("spout1", new TestDummySpout());
operators.put("spout2", new TestDummySpout());
operators.put("bolt1", new TestDummyBolt());
operators.put("bolt2", new TestDummyBolt());
operators.put("sink", new TestSink());
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout1", (IRichSpout) operators.get("spout1"), dops.get("spout1"));
builder.setSpout("spout2", (IRichSpout) operators.get("spout2"), dops.get("spout2"));
builder.setBolt("bolt1", (IRichBolt) operators.get("bolt1"), dops.get("bolt1")).shuffleGrouping("spout1");
builder.setBolt("bolt2", (IRichBolt) operators.get("bolt2"), dops.get("bolt2")).allGrouping("spout2");
builder.setBolt("sink", (IRichBolt) operators.get("sink"), dops.get("sink")).shuffleGrouping("bolt1", TestDummyBolt.groupingStreamId).shuffleGrouping("bolt1", TestDummyBolt.shuffleStreamId).shuffleGrouping("bolt2", TestDummyBolt.groupingStreamId).shuffleGrouping("bolt2", TestDummyBolt.shuffleStreamId);
LocalCluster cluster = new LocalCluster();
Config c = new Config();
c.setNumAckers(0);
cluster.submitTopology("test", c, builder.createTopology());
while (TestSink.result.size() != 8) {
Utils.sleep(100);
}
cluster.shutdown();
final FlinkTopology flinkBuilder = FlinkTopology.createTopology(builder);
StormTopology stormTopology = flinkBuilder.getStormTopology();
Set<Integer> taskIds = new HashSet<Integer>();
for (TopologyContext expectedContext : TestSink.result) {
final String thisComponentId = expectedContext.getThisComponentId();
int index = taskCounter.get(thisComponentId);
StreamingRuntimeContext context = mock(StreamingRuntimeContext.class);
when(context.getTaskName()).thenReturn(thisComponentId);
when(context.getNumberOfParallelSubtasks()).thenReturn(dops.get(thisComponentId));
when(context.getIndexOfThisSubtask()).thenReturn(index);
taskCounter.put(thisComponentId, ++index);
Config stormConfig = new Config();
stormConfig.put(WrapperSetupHelper.TOPOLOGY_NAME, "test");
TopologyContext topologyContext = WrapperSetupHelper.createTopologyContext(context, operators.get(thisComponentId), thisComponentId, stormTopology, stormConfig);
ComponentCommon expcetedCommon = expectedContext.getComponentCommon(thisComponentId);
ComponentCommon common = topologyContext.getComponentCommon(thisComponentId);
Assert.assertNull(topologyContext.getCodeDir());
Assert.assertNull(common.get_json_conf());
Assert.assertNull(topologyContext.getExecutorData(null));
Assert.assertNull(topologyContext.getPIDDir());
Assert.assertNull(topologyContext.getResource(null));
Assert.assertNull(topologyContext.getSharedExecutor());
Assert.assertNull(expectedContext.getTaskData(null));
Assert.assertNull(topologyContext.getThisWorkerPort());
Assert.assertTrue(expectedContext.getStormId().startsWith(topologyContext.getStormId()));
Assert.assertEquals(expcetedCommon.get_inputs(), common.get_inputs());
Assert.assertEquals(expcetedCommon.get_parallelism_hint(), common.get_parallelism_hint());
Assert.assertEquals(expcetedCommon.get_streams(), common.get_streams());
Assert.assertEquals(expectedContext.getComponentIds(), topologyContext.getComponentIds());
Assert.assertEquals(expectedContext.getComponentStreams(thisComponentId), topologyContext.getComponentStreams(thisComponentId));
Assert.assertEquals(thisComponentId, topologyContext.getThisComponentId());
Assert.assertEquals(expectedContext.getThisSources(), topologyContext.getThisSources());
Assert.assertEquals(expectedContext.getThisStreams(), topologyContext.getThisStreams());
Assert.assertEquals(expectedContext.getThisTargets(), topologyContext.getThisTargets());
Assert.assertEquals(0, topologyContext.getThisWorkerTasks().size());
for (int taskId : topologyContext.getComponentTasks(thisComponentId)) {
Assert.assertEquals(thisComponentId, topologyContext.getComponentId(taskId));
}
for (String componentId : expectedContext.getComponentIds()) {
Assert.assertEquals(expectedContext.getSources(componentId), topologyContext.getSources(componentId));
Assert.assertEquals(expectedContext.getTargets(componentId), topologyContext.getTargets(componentId));
for (String streamId : expectedContext.getComponentStreams(componentId)) {
Assert.assertEquals(expectedContext.getComponentOutputFields(componentId, streamId).toList(), topologyContext.getComponentOutputFields(componentId, streamId).toList());
}
}
for (String streamId : expectedContext.getThisStreams()) {
Assert.assertEquals(expectedContext.getThisOutputFields(streamId).toList(), topologyContext.getThisOutputFields(streamId).toList());
}
HashMap<Integer, String> taskToComponents = new HashMap<Integer, String>();
Set<Integer> allTaskIds = new HashSet<Integer>();
for (String componentId : expectedContext.getComponentIds()) {
List<Integer> possibleTasks = expectedContext.getComponentTasks(componentId);
List<Integer> tasks = topologyContext.getComponentTasks(componentId);
Iterator<Integer> p_it = possibleTasks.iterator();
Iterator<Integer> t_it = tasks.iterator();
while (p_it.hasNext()) {
Assert.assertTrue(t_it.hasNext());
Assert.assertNull(taskToComponents.put(p_it.next(), componentId));
Assert.assertTrue(allTaskIds.add(t_it.next()));
}
Assert.assertFalse(t_it.hasNext());
}
Assert.assertEquals(taskToComponents, expectedContext.getTaskToComponent());
Assert.assertTrue(taskIds.add(topologyContext.getThisTaskId()));
try {
topologyContext.getHooks();
Assert.fail();
} catch (UnsupportedOperationException e) {
/* expected */
}
try {
topologyContext.getRegisteredMetricByName(null);
Assert.fail();
} catch (UnsupportedOperationException e) {
/* expected */
}
}
}
use of org.apache.storm.generated.ComponentCommon in project storm by apache.
the class StormCommon method addMetricStreams.
public static void addMetricStreams(StormTopology topology) {
for (Object component : allComponents(topology).values()) {
ComponentCommon common = getComponentCommon(component);
StreamInfo streamInfo = Thrift.outputFields(Arrays.asList("task-info", "data-points"));
common.put_to_streams(Constants.METRICS_STREAM_ID, streamInfo);
}
}
use of org.apache.storm.generated.ComponentCommon in project storm by apache.
the class StormCommon method addEventLogger.
public static void addEventLogger(Map conf, StormTopology topology) {
Integer numExecutors = Utils.getInt(conf.get(Config.TOPOLOGY_EVENTLOGGER_EXECUTORS), Utils.getInt(conf.get(Config.TOPOLOGY_WORKERS)));
HashMap<String, Object> componentConf = new HashMap<>();
componentConf.put(Config.TOPOLOGY_TASKS, numExecutors);
componentConf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, Utils.getInt(conf.get(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)));
Bolt eventLoggerBolt = Thrift.prepareSerializedBoltDetails(eventLoggerInputs(topology), new EventLoggerBolt(), null, numExecutors, componentConf);
for (Object component : allComponents(topology).values()) {
ComponentCommon common = getComponentCommon(component);
common.put_to_streams(EVENTLOGGER_STREAM_ID, Thrift.outputFields(eventLoggerBoltFields()));
}
topology.put_to_bolts(EVENTLOGGER_COMPONENT_ID, eventLoggerBolt);
}
Aggregations