Search in sources :

Example 1 with IRichSpout

use of org.apache.storm.topology.IRichSpout in project flink by apache.

the class FlinkTopology method translateTopology.

/**
	 * Creates a Flink program that uses the specified spouts and bolts.
	 */
private void translateTopology() {
    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();
    // Storm defaults to parallelism 1
    env.setParallelism(1);
    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
        final String spoutId = spout.getKey();
        final IRichSpout userSpout = spout.getValue();
        final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
        userSpout.declareOutputFields(declarer);
        final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
        this.outputStreams.put(spoutId, sourceStreams);
        declarers.put(spoutId, declarer);
        final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
        final DataStreamSource<?> source;
        if (sourceStreams.size() == 1) {
            final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
            spoutWrapperSingleOutput.setStormTopology(stormTopology);
            final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
            DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
            outputStreams.put(outputStreamId, src);
            source = src;
        } else {
            final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
            spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
            @SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
            SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
            for (String streamId : sourceStreams.keySet()) {
                SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
                outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
                outputStreams.put(streamId, outStream);
            }
            source = multiSource;
        }
        availableInputs.put(spoutId, outputStreams);
        final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
        if (common.is_set_parallelism_hint()) {
            int dop = common.get_parallelism_hint();
            source.setParallelism(dop);
        } else {
            common.set_parallelism_hint(1);
        }
    }
    /**
		 * 1. Connect all spout streams with bolts streams
		 * 2. Then proceed with the bolts stream already connected
		 *
		 *  Because we do not know the order in which an iterator steps over a set, we might process a consumer before
		 * its producer
		 * ->thus, we might need to repeat multiple times
		 */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
        if (!makeProgress) {
            StringBuilder strBld = new StringBuilder();
            strBld.append("Unable to build Topology. Could not connect the following bolts:");
            for (String boltId : bolts.keySet()) {
                strBld.append("\n  ");
                strBld.append(boltId);
                strBld.append(": missing input streams [");
                for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
                    strBld.append("'");
                    strBld.append(streams.getKey().get_streamId());
                    strBld.append("' from '");
                    strBld.append(streams.getKey().get_componentId());
                    strBld.append("'; ");
                }
                strBld.append("]");
            }
            throw new RuntimeException(strBld.toString());
        }
        makeProgress = false;
        final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
        while (boltsIterator.hasNext()) {
            final Entry<String, IRichBolt> bolt = boltsIterator.next();
            final String boltId = bolt.getKey();
            final IRichBolt userBolt = copyObject(bolt.getValue());
            final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
            Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
            if (unprocessedBoltInputs == null) {
                unprocessedBoltInputs = new HashSet<>();
                unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
                unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
            }
            // check if all inputs are available
            final int numberOfInputs = unprocessedBoltInputs.size();
            int inputsAvailable = 0;
            for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
                final String producerId = entry.getKey().get_componentId();
                final String streamId = entry.getKey().get_streamId();
                final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
                if (streams != null && streams.get(streamId) != null) {
                    inputsAvailable++;
                }
            }
            if (inputsAvailable != numberOfInputs) {
                // traverse other bolts first until inputs are available
                continue;
            } else {
                makeProgress = true;
                boltsIterator.remove();
            }
            final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
            for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
                final GlobalStreamId streamId = input.getKey();
                final Grouping grouping = input.getValue();
                final String producerId = streamId.get_componentId();
                final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
                inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
            }
            final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
            if (common.is_set_parallelism_hint()) {
                int dop = common.get_parallelism_hint();
                outputStream.setParallelism(dop);
            } else {
                common.set_parallelism_hint(1);
            }
        }
    }
}
Also used : SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) HashMap(java.util.HashMap) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Entry(java.util.Map.Entry) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Grouping(org.apache.storm.generated.Grouping) IRichSpout(org.apache.storm.topology.IRichSpout) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 2 with IRichSpout

use of org.apache.storm.topology.IRichSpout in project flink by apache.

the class SpoutWrapperTest method testRunExecuteFixedNumber.

@SuppressWarnings("unchecked")
@Test
public void testRunExecuteFixedNumber() throws Exception {
    final SetupOutputFieldsDeclarer declarer = new SetupOutputFieldsDeclarer();
    declarer.declare(new Fields("dummy"));
    PowerMockito.whenNew(SetupOutputFieldsDeclarer.class).withNoArguments().thenReturn(declarer);
    final StreamingRuntimeContext taskContext = mock(StreamingRuntimeContext.class);
    when(taskContext.getExecutionConfig()).thenReturn(mock(ExecutionConfig.class));
    when(taskContext.getTaskName()).thenReturn("name");
    final IRichSpout spout = mock(IRichSpout.class);
    final int numberOfCalls = this.r.nextInt(50);
    final SpoutWrapper<?> spoutWrapper = new SpoutWrapper<Object>(spout, numberOfCalls);
    spoutWrapper.setRuntimeContext(taskContext);
    spoutWrapper.run(mock(SourceContext.class));
    verify(spout, times(numberOfCalls)).nextTuple();
}
Also used : Fields(org.apache.storm.tuple.Fields) IRichSpout(org.apache.storm.topology.IRichSpout) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SourceContext(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext) AbstractTest(org.apache.flink.storm.util.AbstractTest) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 3 with IRichSpout

use of org.apache.storm.topology.IRichSpout in project flink by apache.

the class SpoutWrapperTest method testRunPrepare.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testRunPrepare() throws Exception {
    final StormConfig stormConfig = new StormConfig();
    stormConfig.put(this.r.nextInt(), this.r.nextInt());
    final Configuration flinkConfig = new Configuration();
    flinkConfig.setInteger("testKey", this.r.nextInt());
    final ExecutionConfig taskConfig = mock(ExecutionConfig.class);
    when(taskConfig.getGlobalJobParameters()).thenReturn(null).thenReturn(stormConfig).thenReturn(flinkConfig);
    final StreamingRuntimeContext taskContext = mock(StreamingRuntimeContext.class);
    when(taskContext.getExecutionConfig()).thenReturn(taskConfig);
    when(taskContext.getTaskName()).thenReturn("name");
    final IRichSpout spout = mock(IRichSpout.class);
    SpoutWrapper spoutWrapper = new SpoutWrapper(spout);
    spoutWrapper.setRuntimeContext(taskContext);
    spoutWrapper.cancel();
    // test without configuration
    spoutWrapper.run(mock(SourceContext.class));
    verify(spout).open(any(Map.class), any(TopologyContext.class), any(SpoutOutputCollector.class));
    // test with StormConfig
    spoutWrapper.run(mock(SourceContext.class));
    verify(spout).open(eq(stormConfig), any(TopologyContext.class), any(SpoutOutputCollector.class));
    // test with Configuration
    final TestDummySpout testSpout = new TestDummySpout();
    spoutWrapper = new SpoutWrapper(testSpout);
    spoutWrapper.setRuntimeContext(taskContext);
    spoutWrapper.cancel();
    spoutWrapper.run(mock(SourceContext.class));
    for (Entry<String, String> entry : flinkConfig.toMap().entrySet()) {
        Assert.assertEquals(entry.getValue(), testSpout.config.get(entry.getKey()));
    }
}
Also used : StormConfig(org.apache.flink.storm.util.StormConfig) Configuration(org.apache.flink.configuration.Configuration) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SourceContext(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext) TestDummySpout(org.apache.flink.storm.util.TestDummySpout) IRichSpout(org.apache.storm.topology.IRichSpout) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) TopologyContext(org.apache.storm.task.TopologyContext) Map(java.util.Map) AbstractTest(org.apache.flink.storm.util.AbstractTest) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 4 with IRichSpout

use of org.apache.storm.topology.IRichSpout in project flink by apache.

the class SpoutWrapperTest method testCancel.

@Test
public void testCancel() throws Exception {
    final int numberOfCalls = 5 + this.r.nextInt(5);
    final StreamingRuntimeContext taskContext = mock(StreamingRuntimeContext.class);
    when(taskContext.getExecutionConfig()).thenReturn(mock(ExecutionConfig.class));
    when(taskContext.getTaskName()).thenReturn("name");
    final IRichSpout spout = new FiniteTestSpout(numberOfCalls);
    final SpoutWrapper<Tuple1<Integer>> spoutWrapper = new SpoutWrapper<Tuple1<Integer>>(spout);
    spoutWrapper.setRuntimeContext(taskContext);
    spoutWrapper.cancel();
    final TestContext collector = new TestContext();
    spoutWrapper.run(collector);
    Assert.assertEquals(new LinkedList<Tuple1<Integer>>(), collector.result);
}
Also used : IRichSpout(org.apache.storm.topology.IRichSpout) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Tuple1(org.apache.flink.api.java.tuple.Tuple1) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) FiniteTestSpout(org.apache.flink.storm.util.FiniteTestSpout) AbstractTest(org.apache.flink.storm.util.AbstractTest) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 5 with IRichSpout

use of org.apache.storm.topology.IRichSpout in project flink by apache.

the class WrapperSetupHelper method createTopologyContext.

/**
	 * Creates a {@link TopologyContext} for a Spout or Bolt instance (ie, Flink task / Storm executor).
	 * 
	 * @param context
	 *            The Flink runtime context.
	 * @param spoutOrBolt
	 *            The Spout or Bolt this context is created for.
	 * @param stormTopology
	 *            The original Storm topology.
	 * @param stormConfig
	 *            The user provided configuration.
	 * @return The created {@link TopologyContext}.
	 */
@SuppressWarnings({ "rawtypes", "unchecked" })
static synchronized TopologyContext createTopologyContext(final StreamingRuntimeContext context, final IComponent spoutOrBolt, final String operatorName, StormTopology stormTopology, final Map stormConfig) {
    final int dop = context.getNumberOfParallelSubtasks();
    final Map<Integer, String> taskToComponents = new HashMap<Integer, String>();
    final Map<String, List<Integer>> componentToSortedTasks = new HashMap<String, List<Integer>>();
    final Map<String, Map<String, Fields>> componentToStreamToFields = new HashMap<String, Map<String, Fields>>();
    String stormId = (String) stormConfig.get(TOPOLOGY_NAME);
    // not supported
    String codeDir = null;
    // not supported
    String pidDir = null;
    Integer taskId = -1;
    // not supported
    Integer workerPort = null;
    List<Integer> workerTasks = new ArrayList<Integer>();
    final Map<String, Object> defaultResources = new HashMap<String, Object>();
    final Map<String, Object> userResources = new HashMap<String, Object>();
    final Map<String, Object> executorData = new HashMap<String, Object>();
    final Map registeredMetrics = new HashMap();
    Atom openOrPrepareWasCalled = null;
    if (stormTopology == null) {
        // embedded mode
        ComponentCommon common = new ComponentCommon();
        common.set_parallelism_hint(dop);
        HashMap<String, SpoutSpec> spouts = new HashMap<String, SpoutSpec>();
        HashMap<String, Bolt> bolts = new HashMap<String, Bolt>();
        if (spoutOrBolt instanceof IRichSpout) {
            spouts.put(operatorName, new SpoutSpec(null, common));
        } else {
            assert (spoutOrBolt instanceof IRichBolt);
            bolts.put(operatorName, new Bolt(null, common));
        }
        stormTopology = new StormTopology(spouts, bolts, new HashMap<String, StateSpoutSpec>());
        List<Integer> sortedTasks = new ArrayList<Integer>(dop);
        for (int i = 1; i <= dop; ++i) {
            taskToComponents.put(i, operatorName);
            sortedTasks.add(i);
        }
        componentToSortedTasks.put(operatorName, sortedTasks);
        SetupOutputFieldsDeclarer declarer = new SetupOutputFieldsDeclarer();
        spoutOrBolt.declareOutputFields(declarer);
        componentToStreamToFields.put(operatorName, declarer.outputStreams);
    } else {
        // whole topology is built (i.e. FlinkTopology is used)
        Map<String, SpoutSpec> spouts = stormTopology.get_spouts();
        Map<String, Bolt> bolts = stormTopology.get_bolts();
        Map<String, StateSpoutSpec> stateSpouts = stormTopology.get_state_spouts();
        tid = 1;
        for (Entry<String, SpoutSpec> spout : spouts.entrySet()) {
            Integer rc = processSingleOperator(spout.getKey(), spout.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
            if (rc != null) {
                taskId = rc;
            }
        }
        for (Entry<String, Bolt> bolt : bolts.entrySet()) {
            Integer rc = processSingleOperator(bolt.getKey(), bolt.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
            if (rc != null) {
                taskId = rc;
            }
        }
        for (Entry<String, StateSpoutSpec> stateSpout : stateSpouts.entrySet()) {
            Integer rc = processSingleOperator(stateSpout.getKey(), stateSpout.getValue().get_common(), operatorName, context.getIndexOfThisSubtask(), dop, taskToComponents, componentToSortedTasks, componentToStreamToFields);
            if (rc != null) {
                taskId = rc;
            }
        }
        assert (taskId != null);
    }
    if (!stormConfig.containsKey(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)) {
        // Storm default value
        stormConfig.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, 30);
    }
    return new FlinkTopologyContext(stormTopology, stormConfig, taskToComponents, componentToSortedTasks, componentToStreamToFields, stormId, codeDir, pidDir, taskId, workerPort, workerTasks, defaultResources, userResources, executorData, registeredMetrics, openOrPrepareWasCalled);
}
Also used : HashMap(java.util.HashMap) StormTopology(org.apache.storm.generated.StormTopology) ArrayList(java.util.ArrayList) StateSpoutSpec(org.apache.storm.generated.StateSpoutSpec) ArrayList(java.util.ArrayList) List(java.util.List) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Bolt(org.apache.storm.generated.Bolt) IRichBolt(org.apache.storm.topology.IRichBolt) Atom(clojure.lang.Atom) Fields(org.apache.storm.tuple.Fields) IRichSpout(org.apache.storm.topology.IRichSpout) StateSpoutSpec(org.apache.storm.generated.StateSpoutSpec) SpoutSpec(org.apache.storm.generated.SpoutSpec) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

IRichSpout (org.apache.storm.topology.IRichSpout)9 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 AbstractTest (org.apache.flink.storm.util.AbstractTest)4 Map (java.util.Map)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 StreamingRuntimeContext (org.apache.flink.streaming.api.operators.StreamingRuntimeContext)3 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)3 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)2 SourceContext (org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext)2 ComponentCommon (org.apache.storm.generated.ComponentCommon)2 SpoutOutputCollector (org.apache.storm.spout.SpoutOutputCollector)2 TopologyContext (org.apache.storm.task.TopologyContext)2 BoltDeclarer (org.apache.storm.topology.BoltDeclarer)2 IRichBolt (org.apache.storm.topology.IRichBolt)2 SpoutDeclarer (org.apache.storm.topology.SpoutDeclarer)2 ITridentSpout (org.apache.storm.trident.spout.ITridentSpout)2