use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class TimestampITCase method testWatermarkPropagation.
/**
* These check whether custom timestamp emission works at sources and also whether timestamps
* arrive at operators throughout a topology.
*
* <p>
* This also checks whether watermarks keep propagating if a source closes early.
*
* <p>
* This only uses map to test the workings of watermarks in a complete, running topology. All
* tasks and stream operators have dedicated tests that test the watermark propagation
* behaviour.
*/
@Test
public void testWatermarkPropagation() throws Exception {
final int NUM_WATERMARKS = 10;
long initialTime = 0L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
DataStream<Integer> source1 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS));
DataStream<Integer> source2 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS / 2));
source1.union(source2).map(new IdentityMap()).connect(source2).map(new IdentityCoMap()).transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).addSink(new DiscardingSink<Integer>());
env.execute();
// verify that all the watermarks arrived at the final custom operator
for (int i = 0; i < PARALLELISM; i++) {
// other source stops emitting after that
for (int j = 0; j < NUM_WATERMARKS / 2; j++) {
if (!CustomOperator.finalWatermarks[i].get(j).equals(new Watermark(initialTime + j))) {
System.err.println("All Watermarks: ");
for (int k = 0; k <= NUM_WATERMARKS / 2; k++) {
System.err.println(CustomOperator.finalWatermarks[i].get(k));
}
fail("Wrong watermark.");
}
}
assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[i].get(CustomOperator.finalWatermarks[i].size() - 1));
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class TimestampITCase method testTimestampExtractorWithAutoInterval.
/**
* This tests whether timestamps are properly extracted in the timestamp
* extractor and whether watermarks are also correctly forwared from this with the auto watermark
* interval.
*/
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
final int NUM_ELEMENTS = 10;
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.getConfig().setAutoWatermarkInterval(10);
env.setParallelism(1);
env.getConfig().disableSysoutLogging();
DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
int index = 1;
while (index <= NUM_ELEMENTS) {
ctx.collect(index);
latch.await();
index++;
}
}
@Override
public void cancel() {
}
});
DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Integer>() {
@Override
public long extractAscendingTimestamp(Integer element) {
return element;
}
});
extractOp.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator());
// verify that extractor picks up source parallelism
Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());
env.execute();
// verify that we get NUM_ELEMENTS watermarks
for (int j = 0; j < NUM_ELEMENTS; j++) {
if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
}
}
// the input is finite, so it should have a MAX Watermark
assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class WindowFoldITCase method testFoldProcessWindow.
@Test
public void testFoldProcessWindow() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.keyBy(0).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {
@Override
public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
accumulator.f1 += value.f0;
accumulator.f0 += value.f1;
return accumulator;
}
}, new ProcessWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, Tuple, TimeWindow>() {
@Override
public void process(Tuple tuple, Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
int i = 0;
for (Tuple2<Integer, String> in : elements) {
out.collect(new Tuple3<>(in.f1, in.f0, i++));
}
}
}).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {
@Override
public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
testResults.add(value.toString());
}
});
env.execute("Fold Process Window Test");
List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class WindowFoldITCase method testFoldProcessAllWindow.
@Test
public void testFoldProcessAllWindow() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.windowAll(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {
@Override
public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
accumulator.f1 += value.f0;
accumulator.f0 += value.f1;
return accumulator;
}
}, new ProcessAllWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, TimeWindow>() {
@Override
public void process(Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
int i = 0;
for (Tuple2<Integer, String> in : elements) {
out.collect(new Tuple3<>(in.f1, in.f0, i++));
}
}
}).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {
@Override
public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
testResults.add(value.toString());
}
});
env.execute("Fold Process Window Test");
List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class ManualWindowSpeedITCase method testTumblingIngestionTimeWindowsWithFsBackend.
@Test
public void testTumblingIngestionTimeWindowsWithFsBackend() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
env.setParallelism(1);
String checkpoints = tempFolder.newFolder().toURI().toString();
env.setStateBackend(new FsStateBackend(checkpoints));
env.addSource(new InfiniteTupleSource(10_000)).keyBy(0).timeWindow(Time.seconds(3)).reduce(new ReduceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
return Tuple2.of(value1.f0, value1.f1 + value2.f1);
}
}).filter(new FilterFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public boolean filter(Tuple2<String, Integer> value) throws Exception {
return value.f0.startsWith("Tuple 0");
}
}).print();
env.execute();
}
Aggregations