Search in sources :

Example 76 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class TimestampITCase method testWatermarkPropagation.

/**
	 * These check whether custom timestamp emission works at sources and also whether timestamps
	 * arrive at operators throughout a topology.
	 *
	 * <p>
	 * This also checks whether watermarks keep propagating if a source closes early.
	 *
	 * <p>
	 * This only uses map to test the workings of watermarks in a complete, running topology. All
	 * tasks and stream operators have dedicated tests that test the watermark propagation
	 * behaviour.
	 */
@Test
public void testWatermarkPropagation() throws Exception {
    final int NUM_WATERMARKS = 10;
    long initialTime = 0L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(PARALLELISM);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS));
    DataStream<Integer> source2 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS / 2));
    source1.union(source2).map(new IdentityMap()).connect(source2).map(new IdentityCoMap()).transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).addSink(new DiscardingSink<Integer>());
    env.execute();
    // verify that all the watermarks arrived at the final custom operator
    for (int i = 0; i < PARALLELISM; i++) {
        // other source stops emitting after that
        for (int j = 0; j < NUM_WATERMARKS / 2; j++) {
            if (!CustomOperator.finalWatermarks[i].get(j).equals(new Watermark(initialTime + j))) {
                System.err.println("All Watermarks: ");
                for (int k = 0; k <= NUM_WATERMARKS / 2; k++) {
                    System.err.println(CustomOperator.finalWatermarks[i].get(k));
                }
                fail("Wrong watermark.");
            }
        }
        assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[i].get(CustomOperator.finalWatermarks[i].size() - 1));
    }
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 77 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithAutoInterval.

/**
	 * This tests whether timestamps are properly extracted in the timestamp
	 * extractor and whether watermarks are also correctly forwared from this with the auto watermark
	 * interval.
	 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
    final int NUM_ELEMENTS = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.getConfig().setAutoWatermarkInterval(10);
    env.setParallelism(1);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= NUM_ELEMENTS) {
                ctx.collect(index);
                latch.await();
                index++;
            }
        }

        @Override
        public void cancel() {
        }
    });
    DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Integer>() {

        @Override
        public long extractAscendingTimestamp(Integer element) {
            return element;
        }
    });
    extractOp.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator());
    // verify that extractor picks up source parallelism
    Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());
    env.execute();
    // verify that we get NUM_ELEMENTS watermarks
    for (int j = 0; j < NUM_ELEMENTS; j++) {
        if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
            long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
            Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
        }
    }
    // the input is finite, so it should have a MAX Watermark
    assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 78 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class WindowFoldITCase method testFoldProcessWindow.

@Test
public void testFoldProcessWindow() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.keyBy(0).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {

        @Override
        public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
            accumulator.f1 += value.f0;
            accumulator.f0 += value.f1;
            return accumulator;
        }
    }, new ProcessWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, Tuple, TimeWindow>() {

        @Override
        public void process(Tuple tuple, Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
            int i = 0;
            for (Tuple2<Integer, String> in : elements) {
                out.collect(new Tuple3<>(in.f1, in.f0, i++));
            }
        }
    }).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {

        @Override
        public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
            testResults.add(value.toString());
        }
    });
    env.execute("Fold Process Window Test");
    List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FoldFunction(org.apache.flink.api.common.functions.FoldFunction) ProcessWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 79 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class WindowFoldITCase method testFoldProcessAllWindow.

@Test
public void testFoldProcessAllWindow() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.windowAll(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {

        @Override
        public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
            accumulator.f1 += value.f0;
            accumulator.f0 += value.f1;
            return accumulator;
        }
    }, new ProcessAllWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, TimeWindow>() {

        @Override
        public void process(Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
            int i = 0;
            for (Tuple2<Integer, String> in : elements) {
                out.collect(new Tuple3<>(in.f1, in.f0, i++));
            }
        }
    }).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {

        @Override
        public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
            testResults.add(value.toString());
        }
    });
    env.execute("Fold Process Window Test");
    List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FoldFunction(org.apache.flink.api.common.functions.FoldFunction) ProcessAllWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 80 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class ManualWindowSpeedITCase method testTumblingIngestionTimeWindowsWithFsBackend.

@Test
public void testTumblingIngestionTimeWindowsWithFsBackend() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
    env.setParallelism(1);
    String checkpoints = tempFolder.newFolder().toURI().toString();
    env.setStateBackend(new FsStateBackend(checkpoints));
    env.addSource(new InfiniteTupleSource(10_000)).keyBy(0).timeWindow(Time.seconds(3)).reduce(new ReduceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
            return Tuple2.of(value1.f0, value1.f1 + value2.f1);
        }
    }).filter(new FilterFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public boolean filter(Tuple2<String, Integer> value) throws Exception {
            return value.f0.startsWith("Tuple 0");
        }
    }).print();
    env.execute();
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19