Search in sources :

Example 21 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithLongMaxWatermarkFromSource2.

/**
	 * This test verifies that the timestamp extractor forwards Long.MAX_VALUE watermarks.
	 * 
	 * Same test as before, but using a different timestamp extractor
	 */
@Test
public void testTimestampExtractorWithLongMaxWatermarkFromSource2() throws Exception {
    final int NUM_ELEMENTS = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.getConfig().setAutoWatermarkInterval(10);
    env.setParallelism(2);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= NUM_ELEMENTS) {
                ctx.collectWithTimestamp(index, index);
                ctx.collectWithTimestamp(index - 1, index - 1);
                index++;
                ctx.emitWatermark(new Watermark(index - 2));
            }
            // emit the final Long.MAX_VALUE watermark, do it twice and verify that
            // we only see one in the result
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
        }

        @Override
        public void cancel() {
        }
    });
    source1.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Integer>() {

        @Override
        public long extractTimestamp(Integer element, long currentTimestamp) {
            return element;
        }

        @Override
        public Watermark getCurrentWatermark() {
            return null;
        }
    }).transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true));
    env.execute();
    Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 1);
    Assert.assertTrue(CustomOperator.finalWatermarks[0].get(0).getTimestamp() == Long.MAX_VALUE);
}
Also used : AssignerWithPeriodicWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 22 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithCustomWatermarkEmit.

/**
	 * This thests whether timestamps are properly extracted in the timestamp
	 * extractor and whether watermark are correctly forwarded from the custom watermark emit
	 * function.
	 */
@Test
public void testTimestampExtractorWithCustomWatermarkEmit() throws Exception {
    final int NUM_ELEMENTS = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.getConfig().setAutoWatermarkInterval(10);
    env.setParallelism(1);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= NUM_ELEMENTS) {
                ctx.collect(index);
                latch.await();
                index++;
            }
        }

        @Override
        public void cancel() {
        }
    });
    source1.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Integer>() {

        @Override
        public long extractTimestamp(Integer element, long currentTimestamp) {
            return element;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Integer element, long extractedTimestamp) {
            return new Watermark(extractedTimestamp - 1);
        }
    }).transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator());
    env.execute();
    // verify that we get NUM_ELEMENTS watermarks
    for (int j = 0; j < NUM_ELEMENTS; j++) {
        if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
            Assert.fail("Wrong watermark.");
        }
    }
    // the input is finite, so it should have a MAX Watermark
    assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 23 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class TimestampITCase method testWatermarkPropagation.

/**
	 * These check whether custom timestamp emission works at sources and also whether timestamps
	 * arrive at operators throughout a topology.
	 *
	 * <p>
	 * This also checks whether watermarks keep propagating if a source closes early.
	 *
	 * <p>
	 * This only uses map to test the workings of watermarks in a complete, running topology. All
	 * tasks and stream operators have dedicated tests that test the watermark propagation
	 * behaviour.
	 */
@Test
public void testWatermarkPropagation() throws Exception {
    final int NUM_WATERMARKS = 10;
    long initialTime = 0L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(PARALLELISM);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS));
    DataStream<Integer> source2 = env.addSource(new MyTimestampSource(initialTime, NUM_WATERMARKS / 2));
    source1.union(source2).map(new IdentityMap()).connect(source2).map(new IdentityCoMap()).transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).addSink(new DiscardingSink<Integer>());
    env.execute();
    // verify that all the watermarks arrived at the final custom operator
    for (int i = 0; i < PARALLELISM; i++) {
        // other source stops emitting after that
        for (int j = 0; j < NUM_WATERMARKS / 2; j++) {
            if (!CustomOperator.finalWatermarks[i].get(j).equals(new Watermark(initialTime + j))) {
                System.err.println("All Watermarks: ");
                for (int k = 0; k <= NUM_WATERMARKS / 2; k++) {
                    System.err.println(CustomOperator.finalWatermarks[i].get(k));
                }
                fail("Wrong watermark.");
            }
        }
        assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[i].get(CustomOperator.finalWatermarks[i].size() - 1));
    }
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 24 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithAutoInterval.

/**
	 * This tests whether timestamps are properly extracted in the timestamp
	 * extractor and whether watermarks are also correctly forwared from this with the auto watermark
	 * interval.
	 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
    final int NUM_ELEMENTS = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.getConfig().setAutoWatermarkInterval(10);
    env.setParallelism(1);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= NUM_ELEMENTS) {
                ctx.collect(index);
                latch.await();
                index++;
            }
        }

        @Override
        public void cancel() {
        }
    });
    DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Integer>() {

        @Override
        public long extractAscendingTimestamp(Integer element) {
            return element;
        }
    });
    extractOp.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)).transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator());
    // verify that extractor picks up source parallelism
    Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());
    env.execute();
    // verify that we get NUM_ELEMENTS watermarks
    for (int j = 0; j < NUM_ELEMENTS; j++) {
        if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
            long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
            Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
        }
    }
    // the input is finite, so it should have a MAX Watermark
    assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 25 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class AbstractStreamOperator method processWatermark2.

public void processWatermark2(Watermark mark) throws Exception {
    input2Watermark = mark.getTimestamp();
    long newMin = Math.min(input1Watermark, input2Watermark);
    if (newMin > combinedWatermark) {
        combinedWatermark = newMin;
        processWatermark(new Watermark(combinedWatermark));
    }
}
Also used : Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Aggregations

Watermark (org.apache.flink.streaming.api.watermark.Watermark)117 Test (org.junit.Test)92 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)52 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)36 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)36 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)31 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)29 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)21 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)17 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)16 OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)16 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)13 ArrayList (java.util.ArrayList)12 Map (java.util.Map)10 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)10 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)9 Event (org.apache.flink.cep.Event)9 SubEvent (org.apache.flink.cep.SubEvent)9 HashMap (java.util.HashMap)8