use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class SocketWindowWordCount method main.
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
}).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class SideOutputITCase method testAllWindowLateArrivingEvents.
/**
* Test window late arriving events stream
*/
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
see.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream<Integer> dataStream = see.fromCollection(elements);
OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
};
SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).timeWindowAll(Time.milliseconds(1), Time.milliseconds(1)).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {
private static final long serialVersionUID = 1L;
@Override
public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
for (Integer val : values) {
out.collect(val);
}
}
});
windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer value, Collector<String> out) throws Exception {
out.collect("late-" + String.valueOf(value));
}
}).addSink(sideOutputResultSink);
see.execute();
assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class StreamingJobGraphGeneratorTest method testResourcesForIteration.
/**
* Verifies that the resources are merged correctly for chained operators (covers middle chaining and iteration cases)
* when generating job graph
*/
@Test
public void testResourcesForIteration() throws Exception {
ResourceSpec resource1 = new ResourceSpec(0.1, 100);
ResourceSpec resource2 = new ResourceSpec(0.2, 200);
ResourceSpec resource3 = new ResourceSpec(0.3, 300);
ResourceSpec resource4 = new ResourceSpec(0.4, 400);
ResourceSpec resource5 = new ResourceSpec(0.5, 500);
Method opMethod = SingleOutputStreamOperator.class.getDeclaredMethod("setResources", ResourceSpec.class);
opMethod.setAccessible(true);
Method sinkMethod = DataStreamSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
sinkMethod.setAccessible(true);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).name("test_source");
opMethod.invoke(source, resource1);
IterativeStream<Integer> iteration = source.iterate(3000);
opMethod.invoke(iteration, resource2);
DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {
@Override
public void flatMap(Integer value, Collector<Integer> out) throws Exception {
out.collect(value);
}
}).name("test_flatMap");
opMethod.invoke(flatMap, resource3);
// CHAIN(flatMap -> Filter)
DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) throws Exception {
return false;
}
}).name("test_filter");
opMethod.invoke(increment, resource4);
DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {
@Override
public void invoke(Integer value) throws Exception {
}
}).disableChaining().name("test_sink");
sinkMethod.invoke(sink, resource5);
JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
for (JobVertex jobVertex : jobGraph.getVertices()) {
if (jobVertex.getName().contains("test_source")) {
assertTrue(jobVertex.getMinResources().equals(resource1));
} else if (jobVertex.getName().contains("Iteration_Source")) {
assertTrue(jobVertex.getPreferredResources().equals(resource2));
} else if (jobVertex.getName().contains("test_flatMap")) {
assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
} else if (jobVertex.getName().contains("Iteration_Tail")) {
assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
} else if (jobVertex.getName().contains("test_sink")) {
assertTrue(jobVertex.getMinResources().equals(resource5));
}
}
}
Aggregations