Search in sources :

Example 66 with Fields

use of org.apache.storm.tuple.Fields in project storm by apache.

the class PartialKeyGroupingTest method testChooseTasksFields.

@Test
public void testChooseTasksFields() {
    PartialKeyGrouping pkg = new PartialKeyGrouping(new Fields("test"));
    WorkerTopologyContext context = mock(WorkerTopologyContext.class);
    when(context.getComponentOutputFields(any(GlobalStreamId.class))).thenReturn(new Fields("test"));
    pkg.prepare(context, null, Lists.newArrayList(0, 1, 2, 3, 4, 5));
    Values message = new Values("key1");
    List<Integer> choice1 = pkg.chooseTasks(0, message);
    assertThat(choice1.size(), is(1));
    List<Integer> choice2 = pkg.chooseTasks(0, message);
    assertThat(choice2, is(not(choice1)));
    List<Integer> choice3 = pkg.chooseTasks(0, message);
    assertThat(choice3, is(not(choice2)));
    assertThat(choice3, is(choice1));
}
Also used : Fields(org.apache.storm.tuple.Fields) WorkerTopologyContext(org.apache.storm.task.WorkerTopologyContext) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Values(org.apache.storm.tuple.Values) Test(org.junit.Test)

Example 67 with Fields

use of org.apache.storm.tuple.Fields in project storm by apache.

the class Node method getOutputFields.

static Fields getOutputFields(IComponent component, String streamId) {
    OutputFieldsGetter getter = new OutputFieldsGetter();
    component.declareOutputFields(getter);
    Map<String, StreamInfo> fieldsDeclaration = getter.getFieldsDeclaration();
    if ((fieldsDeclaration != null) && fieldsDeclaration.containsKey(streamId)) {
        return new Fields(fieldsDeclaration.get(streamId).get_output_fields());
    }
    return new Fields();
}
Also used : Fields(org.apache.storm.tuple.Fields) OutputFieldsGetter(org.apache.storm.topology.OutputFieldsGetter) StreamInfo(org.apache.storm.generated.StreamInfo)

Example 68 with Fields

use of org.apache.storm.tuple.Fields in project storm by apache.

the class ProcessorBoltTest method setUpProcessorBolt.

private void setUpProcessorBolt(Processor<?> processor, Set<String> windowedParentStreams, boolean isWindowed, String tsFieldName) {
    ProcessorNode node = new ProcessorNode(processor, "outputstream", new Fields("value"));
    node.setWindowedParentStreams(windowedParentStreams);
    node.setWindowed(isWindowed);
    Mockito.when(mockStreamToProcessors.get(Mockito.anyString())).thenReturn(Collections.singletonList(node));
    Mockito.when(mockStreamToProcessors.keySet()).thenReturn(Collections.singleton("inputstream"));
    Map mockSources = Mockito.mock(Map.class);
    GlobalStreamId mockGlobalStreamId = Mockito.mock(GlobalStreamId.class);
    Mockito.when(mockTopologyContext.getThisSources()).thenReturn(mockSources);
    Mockito.when(mockSources.keySet()).thenReturn(Collections.singleton(mockGlobalStreamId));
    Mockito.when(mockGlobalStreamId.get_streamId()).thenReturn("inputstream");
    Mockito.when(mockGlobalStreamId.get_componentId()).thenReturn("bolt0");
    Mockito.when(mockTopologyContext.getComponentTasks(Mockito.anyString())).thenReturn(Collections.singletonList(1));
    graph.addVertex(node);
    bolt = new ProcessorBolt("bolt1", graph, Collections.singletonList(node));
    if (tsFieldName != null && !tsFieldName.isEmpty()) {
        bolt.setTimestampField(tsFieldName);
    }
    bolt.setStreamToInitialProcessors(mockStreamToProcessors);
    bolt.prepare(new HashMap<>(), mockTopologyContext, mockOutputCollector);
}
Also used : Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) HashMap(java.util.HashMap) Map(java.util.Map)

Example 69 with Fields

use of org.apache.storm.tuple.Fields in project storm by apache.

the class TestTridentTopology method buildTopology.

private StormTopology buildTopology() {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"));
    spout.setCycle(true);
    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout).each(new Fields("sentence"), new Split(), new Fields("word")).partitionBy(new Fields("word")).name("abc").each(new Fields("word"), new StringLength(), new Fields("length")).partitionBy(new Fields("length")).name("def").aggregate(new Fields("length"), new Count(), new Fields("count")).partitionBy(new Fields("count")).name("ghi").aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
Also used : FixedBatchSpout(org.apache.storm.trident.testing.FixedBatchSpout) StringLength(org.apache.storm.trident.testing.StringLength) Fields(org.apache.storm.tuple.Fields) Values(org.apache.storm.tuple.Values) Sum(org.apache.storm.trident.operation.builtin.Sum) Count(org.apache.storm.trident.operation.builtin.Count) Split(org.apache.storm.trident.testing.Split)

Example 70 with Fields

use of org.apache.storm.tuple.Fields in project flink by apache.

the class FlinkTopology method translateTopology.

/**
	 * Creates a Flink program that uses the specified spouts and bolts.
	 */
private void translateTopology() {
    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();
    // Storm defaults to parallelism 1
    env.setParallelism(1);
    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
        final String spoutId = spout.getKey();
        final IRichSpout userSpout = spout.getValue();
        final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
        userSpout.declareOutputFields(declarer);
        final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
        this.outputStreams.put(spoutId, sourceStreams);
        declarers.put(spoutId, declarer);
        final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
        final DataStreamSource<?> source;
        if (sourceStreams.size() == 1) {
            final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
            spoutWrapperSingleOutput.setStormTopology(stormTopology);
            final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
            DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
            outputStreams.put(outputStreamId, src);
            source = src;
        } else {
            final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
            spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
            @SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
            SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
            for (String streamId : sourceStreams.keySet()) {
                SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
                outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
                outputStreams.put(streamId, outStream);
            }
            source = multiSource;
        }
        availableInputs.put(spoutId, outputStreams);
        final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
        if (common.is_set_parallelism_hint()) {
            int dop = common.get_parallelism_hint();
            source.setParallelism(dop);
        } else {
            common.set_parallelism_hint(1);
        }
    }
    /**
		 * 1. Connect all spout streams with bolts streams
		 * 2. Then proceed with the bolts stream already connected
		 *
		 *  Because we do not know the order in which an iterator steps over a set, we might process a consumer before
		 * its producer
		 * ->thus, we might need to repeat multiple times
		 */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
        if (!makeProgress) {
            StringBuilder strBld = new StringBuilder();
            strBld.append("Unable to build Topology. Could not connect the following bolts:");
            for (String boltId : bolts.keySet()) {
                strBld.append("\n  ");
                strBld.append(boltId);
                strBld.append(": missing input streams [");
                for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
                    strBld.append("'");
                    strBld.append(streams.getKey().get_streamId());
                    strBld.append("' from '");
                    strBld.append(streams.getKey().get_componentId());
                    strBld.append("'; ");
                }
                strBld.append("]");
            }
            throw new RuntimeException(strBld.toString());
        }
        makeProgress = false;
        final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
        while (boltsIterator.hasNext()) {
            final Entry<String, IRichBolt> bolt = boltsIterator.next();
            final String boltId = bolt.getKey();
            final IRichBolt userBolt = copyObject(bolt.getValue());
            final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
            Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
            if (unprocessedBoltInputs == null) {
                unprocessedBoltInputs = new HashSet<>();
                unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
                unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
            }
            // check if all inputs are available
            final int numberOfInputs = unprocessedBoltInputs.size();
            int inputsAvailable = 0;
            for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
                final String producerId = entry.getKey().get_componentId();
                final String streamId = entry.getKey().get_streamId();
                final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
                if (streams != null && streams.get(streamId) != null) {
                    inputsAvailable++;
                }
            }
            if (inputsAvailable != numberOfInputs) {
                // traverse other bolts first until inputs are available
                continue;
            } else {
                makeProgress = true;
                boltsIterator.remove();
            }
            final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
            for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
                final GlobalStreamId streamId = input.getKey();
                final Grouping grouping = input.getValue();
                final String producerId = streamId.get_componentId();
                final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
                inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
            }
            final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
            if (common.is_set_parallelism_hint()) {
                int dop = common.get_parallelism_hint();
                outputStream.setParallelism(dop);
            } else {
                common.set_parallelism_hint(1);
            }
        }
    }
}
Also used : SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) HashMap(java.util.HashMap) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Entry(java.util.Map.Entry) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Grouping(org.apache.storm.generated.Grouping) IRichSpout(org.apache.storm.topology.IRichSpout) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Aggregations

Fields (org.apache.storm.tuple.Fields)170 Test (org.junit.Test)44 Values (org.apache.storm.tuple.Values)38 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)36 TridentTopology (org.apache.storm.trident.TridentTopology)32 HashMap (java.util.HashMap)31 Config (org.apache.storm.Config)31 Stream (org.apache.storm.trident.Stream)25 LocalCluster (org.apache.storm.LocalCluster)19 LocalTopology (org.apache.storm.LocalCluster.LocalTopology)17 TridentState (org.apache.storm.trident.TridentState)17 FixedBatchSpout (org.apache.storm.trident.testing.FixedBatchSpout)16 ArrayList (java.util.ArrayList)14 Map (java.util.Map)14 HiveOptions (org.apache.storm.hive.common.HiveOptions)14 AbstractTest (org.apache.flink.storm.util.AbstractTest)13 DelimitedRecordHiveMapper (org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper)12 IRichBolt (org.apache.storm.topology.IRichBolt)12 StateFactory (org.apache.storm.trident.state.StateFactory)12 Tuple (org.apache.storm.tuple.Tuple)12