use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WordCount method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
DataStream<String> text = getTextDataStream(env);
DataStream<Tuple2<String, Integer>> counts = // normalize and split each line
text.map(line -> line.toLowerCase().split("\\W+")).flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
// emit the pairs with non-zero-length words
Arrays.stream(tokens).filter(t -> t.length() > 0).forEach(t -> out.collect(new Tuple2<>(t, 1)));
}).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount Example");
}
use of org.apache.flink.streaming.api.datastream.DataStream in project camel by apache.
the class DataStreamFlinkProducer method collectResults.
protected void collectResults(Exchange exchange, Object result) {
if (result instanceof DataStream) {
DataStream dsResults = (DataStream) result;
if (getEndpoint().isCollect()) {
throw new IllegalArgumentException("collect mode not supported for Flink DataStreams.");
} else {
exchange.getIn().setBody(result);
exchange.getIn().setHeader(FlinkConstants.FLINK_DATASTREAM_HEADER, result);
}
} else {
exchange.getIn().setBody(result);
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SavepointWriterWindowITCase method testSlideWindow.
@Test
public void testSlideWindow() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
DataStream<Tuple2<String, Integer>> bootstrapData = env.fromCollection(WORDS).map(word -> Tuple2.of(word, 1), TUPLE_TYPE_INFO).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>noWatermarks().withTimestampAssigner((record, ts) -> 2L));
WindowedStateTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
SavepointWriter.newSavepoint(stateBackend, 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
env.execute("write state");
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = env.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, env);
Collection<Tuple2<String, Integer>> results = future.get().stream().distinct().collect(Collectors.toList());
Assert.assertThat("Incorrect results from bootstrapped windows", results, STANDARD_MATCHER);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WritableSavepointWindowITCase method testSlideWindow.
@Test
public void testSlideWindow() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
Savepoint.create(new MemoryStateBackend(), 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertEquals("Incorrect number of results", 15, results.size());
Assert.assertThat("Incorrect bootstrap state", new HashSet<>(results), STANDARD_MATCHER);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WritableSavepointWindowITCase method testSlideWindowWithEvictor.
@Test
public void testSlideWindowWithEvictor() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1))).evictor(CountEvictor.of(1));
Savepoint.create(new MemoryStateBackend(), 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1))).evictor(CountEvictor.of(1));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertEquals("Incorrect number of results", 15, results.size());
Assert.assertThat("Incorrect bootstrap state", new HashSet<>(results), EVICTOR_MATCHER);
}
Aggregations