Search in sources :

Example 6 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class SocketWindowWordCount method main.

public static void main(String[] args) throws Exception {
    // the host and the port to connect to
    final String hostname;
    final int port;
    try {
        final ParameterTool params = ParameterTool.fromArgs(args);
        hostname = params.has("hostname") ? params.get("hostname") : "localhost";
        port = params.getInt("port");
    } catch (Exception e) {
        System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
        System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
        return;
    }
    // get the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data by connecting to the socket
    DataStream<String> text = env.socketTextStream(hostname, port, "\n");
    // parse the data, group it, window it, and aggregate the counts
    DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {

        @Override
        public void flatMap(String value, Collector<WordWithCount> out) {
            for (String word : value.split("\\s")) {
                out.collect(new WordWithCount(word, 1L));
            }
        }
    }).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {

        @Override
        public WordWithCount reduce(WordWithCount a, WordWithCount b) {
            return new WordWithCount(a.word, a.count + b.count);
        }
    });
    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1);
    env.execute("Socket Window WordCount");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 7 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class SideOutputITCase method testAllWindowLateArrivingEvents.

/**
	 * Test window late arriving events stream
	 */
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    see.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
    };
    SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).timeWindowAll(Time.milliseconds(1), Time.milliseconds(1)).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
            for (Integer val : values) {
                out.collect(val);
            }
        }
    });
    windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("late-" + String.valueOf(value));
        }
    }).addSink(sideOutputResultSink);
    see.execute();
    assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 8 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class StreamingJobGraphGeneratorTest method testResourcesForIteration.

/**
	 * Verifies that the resources are merged correctly for chained operators (covers middle chaining and iteration cases)
	 * when generating job graph
	 */
@Test
public void testResourcesForIteration() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    Method opMethod = SingleOutputStreamOperator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method sinkMethod = DataStreamSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    }).name("test_source");
    opMethod.invoke(source, resource1);
    IterativeStream<Integer> iteration = source.iterate(3000);
    opMethod.invoke(iteration, resource2);
    DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {

        @Override
        public void flatMap(Integer value, Collector<Integer> out) throws Exception {
            out.collect(value);
        }
    }).name("test_flatMap");
    opMethod.invoke(flatMap, resource3);
    // CHAIN(flatMap -> Filter)
    DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    }).name("test_filter");
    opMethod.invoke(increment, resource4);
    DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {

        @Override
        public void invoke(Integer value) throws Exception {
        }
    }).disableChaining().name("test_sink");
    sinkMethod.invoke(sink, resource5);
    JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
    for (JobVertex jobVertex : jobGraph.getVertices()) {
        if (jobVertex.getName().contains("test_source")) {
            assertTrue(jobVertex.getMinResources().equals(resource1));
        } else if (jobVertex.getName().contains("Iteration_Source")) {
            assertTrue(jobVertex.getPreferredResources().equals(resource2));
        } else if (jobVertex.getName().contains("test_flatMap")) {
            assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
        } else if (jobVertex.getName().contains("Iteration_Tail")) {
            assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
        } else if (jobVertex.getName().contains("test_sink")) {
            assertTrue(jobVertex.getMinResources().equals(resource5));
        }
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)8 Collector (org.apache.flink.util.Collector)8 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 Test (org.junit.Test)5 MapFunction (org.apache.flink.api.common.functions.MapFunction)3 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 CoMapFunction (org.apache.flink.streaming.api.functions.co.CoMapFunction)2 TestListResultSink (org.apache.flink.test.streaming.runtime.util.TestListResultSink)2 IOException (java.io.IOException)1 Method (java.lang.reflect.Method)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Properties (java.util.Properties)1 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)1 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)1 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1