Search in sources :

Example 16 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class CEPITCase method testSimplePatternEventTime.

@Test
public void testSimplePatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    // (Event, timestamp)
    DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 5L), Tuple2.of(new Event(2, "middle", 2.0), 1L), Tuple2.of(new Event(3, "end", 3.0), 3L), Tuple2.of(new Event(4, "end", 4.0), 10L), Tuple2.of(new Event(5, "middle", 5.0), 7L), // last element for high final watermark
    Tuple2.of(new Event(5, "middle", 5.0), 100L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

        @Override
        public long extractTimestamp(Tuple2<Event, Long> element, long previousTimestamp) {
            return element.f1;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
            return new Watermark(lastElement.f1 - 5);
        }
    }).map(new MapFunction<Tuple2<Event, Long>, Event>() {

        @Override
        public Event map(Tuple2<Event, Long> value) throws Exception {
            return value.f0;
        }
    });
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedBy("middle").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("middle");
        }
    }).followedBy("end").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    DataStream<String> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, Event> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").getId()).append(",").append(pattern.get("middle").getId()).append(",").append(pattern.get("end").getId());
            return builder.toString();
        }
    });
    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    // the expected sequence of matching event ids
    expected = "1,5,4";
    env.execute();
}
Also used : AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 17 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class CEPMigration11to13Test method testNonKeyedCEPFunctionMigration.

@Test
public void testNonKeyedCEPFunctionMigration() throws Exception {
    final Event startEvent = new Event(42, "start", 1.0);
    final SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    final Event endEvent = new Event(42, "end", 1.0);
    // uncomment these lines for regenerating the snapshot on Flink 1.1
    /*
		OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new OneInputStreamOperatorTestHarness<>(
				new CEPPatternOperator<>(
						Event.createTypeSerializer(),
						false,
						new NFAFactory()));
		harness.open();
		harness.processElement(new StreamRecord<Event>(startEvent, 1));
		harness.processElement(new StreamRecord<Event>(new Event(42, "foobar", 1.0), 2));
		harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3));
		harness.processWatermark(new Watermark(2));

		// simulate snapshot/restore with empty element queue but NFA state
		StreamTaskState snapshot = harness.snapshot(1, 1);
		FileOutputStream out = new FileOutputStream(
				"src/test/resources/cep-non-keyed-snapshot-1.1");
		ObjectOutputStream oos = new ObjectOutputStream(out);
		oos.writeObject(snapshot);
		out.close();
		harness.close();
		*/
    NullByteKeySelector keySelector = new NullByteKeySelector();
    OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new KeyedCEPPatternOperator<>(Event.createTypeSerializer(), false, keySelector, ByteSerializer.INSTANCE, new NFAFactory(), false), keySelector, BasicTypeInfo.BYTE_TYPE_INFO);
    harness.setup();
    harness.initializeStateFromLegacyCheckpoint(getResourceFilename("cep-non-keyed-snapshot-1.1"));
    harness.open();
    harness.processElement(new StreamRecord<Event>(middleEvent, 3));
    harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
    harness.processElement(new StreamRecord<>(endEvent, 5));
    harness.processWatermark(new Watermark(Long.MAX_VALUE));
    ConcurrentLinkedQueue<Object> result = harness.getOutput();
    // watermark and the result
    assertEquals(2, result.size());
    Object resultObject = result.poll();
    assertTrue(resultObject instanceof StreamRecord);
    StreamRecord<?> resultRecord = (StreamRecord<?>) resultObject;
    assertTrue(resultRecord.getValue() instanceof Map);
    @SuppressWarnings("unchecked") Map<String, Event> patternMap = (Map<String, Event>) resultRecord.getValue();
    assertEquals(startEvent, patternMap.get("start"));
    assertEquals(middleEvent, patternMap.get("middle"));
    assertEquals(endEvent, patternMap.get("end"));
    harness.close();
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) NullByteKeySelector(org.apache.flink.api.java.functions.NullByteKeySelector) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 18 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class CEPMigration11to13Test method testKeyedCEPOperatorMigratation.

@Test
public void testKeyedCEPOperatorMigratation() throws Exception {
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    final Event startEvent = new Event(42, "start", 1.0);
    final SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    final Event endEvent = new Event(42, "end", 1.0);
    // uncomment these lines for regenerating the snapshot on Flink 1.1
    /*
		OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new OneInputStreamOperatorTestHarness<>(
				new KeyedCEPPatternOperator<>(
						Event.createTypeSerializer(),
						false,
						keySelector,
						IntSerializer.INSTANCE,
						new NFAFactory()));
		harness.configureForKeyedStream(keySelector, BasicTypeInfo.INT_TYPE_INFO);
		harness.open();
		harness.processElement(new StreamRecord<Event>(startEvent, 1));
		harness.processElement(new StreamRecord<Event>(new Event(42, "foobar", 1.0), 2));
		harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3));
		harness.processWatermark(new Watermark(2));
		// simulate snapshot/restore with empty element queue but NFA state
		StreamTaskState snapshot = harness.snapshot(1, 1);
		FileOutputStream out = new FileOutputStream(
				"src/test/resources/cep-keyed-snapshot-1.1");
		ObjectOutputStream oos = new ObjectOutputStream(out);
		oos.writeObject(snapshot);
		out.close();
		harness.close();
		*/
    OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new KeyedCEPPatternOperator<>(Event.createTypeSerializer(), false, keySelector, IntSerializer.INSTANCE, new NFAFactory(), true), keySelector, BasicTypeInfo.INT_TYPE_INFO);
    harness.setup();
    harness.initializeStateFromLegacyCheckpoint(getResourceFilename("cep-keyed-snapshot-1.1"));
    harness.open();
    harness.processElement(new StreamRecord<Event>(middleEvent, 3));
    harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
    harness.processElement(new StreamRecord<>(endEvent, 5));
    harness.processWatermark(new Watermark(20));
    ConcurrentLinkedQueue<Object> result = harness.getOutput();
    // watermark and the result
    assertEquals(2, result.size());
    Object resultObject = result.poll();
    assertTrue(resultObject instanceof StreamRecord);
    StreamRecord<?> resultRecord = (StreamRecord<?>) resultObject;
    assertTrue(resultRecord.getValue() instanceof Map);
    @SuppressWarnings("unchecked") Map<String, Event> patternMap = (Map<String, Event>) resultRecord.getValue();
    assertEquals(startEvent, patternMap.get("start"));
    assertEquals(middleEvent, patternMap.get("middle"));
    assertEquals(endEvent, patternMap.get("end"));
    harness.close();
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) KeySelector(org.apache.flink.api.java.functions.KeySelector) NullByteKeySelector(org.apache.flink.api.java.functions.NullByteKeySelector) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 19 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class CEPOperatorTest method testKeyedCEPOperatorCheckpointing.

@Test
public void testKeyedCEPOperatorCheckpointing() throws Exception {
    OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = getCepTestHarness(false);
    harness.open();
    Event startEvent = new Event(42, "start", 1.0);
    SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    Event endEvent = new Event(42, "end", 1.0);
    harness.processElement(new StreamRecord<>(startEvent, 1L));
    harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 2L));
    // simulate snapshot/restore with some elements in internal sorting queue
    OperatorStateHandles snapshot = harness.snapshot(0L, 0L);
    harness.close();
    harness = getCepTestHarness(false);
    harness.setup();
    harness.initializeState(snapshot);
    harness.open();
    harness.processWatermark(new Watermark(Long.MIN_VALUE));
    harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3L));
    // if element timestamps are not correctly checkpointed/restored this will lead to
    // a pruning time underflow exception in NFA
    harness.processWatermark(new Watermark(2L));
    harness.processElement(new StreamRecord<Event>(middleEvent, 3L));
    harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4L));
    harness.processElement(new StreamRecord<>(endEvent, 5L));
    // simulate snapshot/restore with empty element queue but NFA state
    OperatorStateHandles snapshot2 = harness.snapshot(1L, 1L);
    harness.close();
    harness = getCepTestHarness(false);
    harness.setup();
    harness.initializeState(snapshot2);
    harness.open();
    harness.processWatermark(new Watermark(Long.MAX_VALUE));
    // get and verify the output
    Queue<Object> result = harness.getOutput();
    assertEquals(2, result.size());
    verifyPattern(result.poll(), startEvent, middleEvent, endEvent);
    verifyWatermark(result.poll(), Long.MAX_VALUE);
    harness.close();
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) HashMap(java.util.HashMap) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 20 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithLongMaxWatermarkFromSource.

/**
	 * This test verifies that the timestamp extractor forwards Long.MAX_VALUE watermarks.
	 */
@Test
public void testTimestampExtractorWithLongMaxWatermarkFromSource() throws Exception {
    final int NUM_ELEMENTS = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.getConfig().setAutoWatermarkInterval(1);
    env.setParallelism(2);
    env.getConfig().disableSysoutLogging();
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= NUM_ELEMENTS) {
                ctx.collectWithTimestamp(index, index);
                ctx.collectWithTimestamp(index - 1, index - 1);
                index++;
                ctx.emitWatermark(new Watermark(index - 2));
            }
            // emit the final Long.MAX_VALUE watermark, do it twice and verify that
            // we only see one in the result
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
        }

        @Override
        public void cancel() {
        }
    });
    source1.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Integer>() {

        @Override
        public long extractTimestamp(Integer element, long currentTimestamp) {
            return element;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Integer element, long extractedTimestamp) {
            return null;
        }
    }).transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true));
    env.execute();
    Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 1);
    Assert.assertTrue(CustomOperator.finalWatermarks[0].get(0).getTimestamp() == Long.MAX_VALUE);
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Test(org.junit.Test)

Aggregations

Watermark (org.apache.flink.streaming.api.watermark.Watermark)117 Test (org.junit.Test)92 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)52 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)36 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)36 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)31 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)29 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)21 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)17 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)16 OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)16 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)13 ArrayList (java.util.ArrayList)12 Map (java.util.Map)10 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)10 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)9 Event (org.apache.flink.cep.Event)9 SubEvent (org.apache.flink.cep.SubEvent)9 HashMap (java.util.HashMap)8