use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.
the class CEPITCase method testSimplePatternEventTime.
@Test
public void testSimplePatternEventTime() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// (Event, timestamp)
DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 5L), Tuple2.of(new Event(2, "middle", 2.0), 1L), Tuple2.of(new Event(3, "end", 3.0), 3L), Tuple2.of(new Event(4, "end", 4.0), 10L), Tuple2.of(new Event(5, "middle", 5.0), 7L), // last element for high final watermark
Tuple2.of(new Event(5, "middle", 5.0), 100L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {
@Override
public long extractTimestamp(Tuple2<Event, Long> element, long previousTimestamp) {
return element.f1;
}
@Override
public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
return new Watermark(lastElement.f1 - 5);
}
}).map(new MapFunction<Tuple2<Event, Long>, Event>() {
@Override
public Event map(Tuple2<Event, Long> value) throws Exception {
return value.f0;
}
});
Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("start");
}
}).followedBy("middle").where(new FilterFunction<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("middle");
}
}).followedBy("end").where(new FilterFunction<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("end");
}
});
DataStream<String> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Event, String>() {
@Override
public String select(Map<String, Event> pattern) {
StringBuilder builder = new StringBuilder();
builder.append(pattern.get("start").getId()).append(",").append(pattern.get("middle").getId()).append(",").append(pattern.get("end").getId());
return builder.toString();
}
});
result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
// the expected sequence of matching event ids
expected = "1,5,4";
env.execute();
}
use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.
the class CEPMigration11to13Test method testNonKeyedCEPFunctionMigration.
@Test
public void testNonKeyedCEPFunctionMigration() throws Exception {
final Event startEvent = new Event(42, "start", 1.0);
final SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
final Event endEvent = new Event(42, "end", 1.0);
// uncomment these lines for regenerating the snapshot on Flink 1.1
/*
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new OneInputStreamOperatorTestHarness<>(
new CEPPatternOperator<>(
Event.createTypeSerializer(),
false,
new NFAFactory()));
harness.open();
harness.processElement(new StreamRecord<Event>(startEvent, 1));
harness.processElement(new StreamRecord<Event>(new Event(42, "foobar", 1.0), 2));
harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3));
harness.processWatermark(new Watermark(2));
// simulate snapshot/restore with empty element queue but NFA state
StreamTaskState snapshot = harness.snapshot(1, 1);
FileOutputStream out = new FileOutputStream(
"src/test/resources/cep-non-keyed-snapshot-1.1");
ObjectOutputStream oos = new ObjectOutputStream(out);
oos.writeObject(snapshot);
out.close();
harness.close();
*/
NullByteKeySelector keySelector = new NullByteKeySelector();
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new KeyedCEPPatternOperator<>(Event.createTypeSerializer(), false, keySelector, ByteSerializer.INSTANCE, new NFAFactory(), false), keySelector, BasicTypeInfo.BYTE_TYPE_INFO);
harness.setup();
harness.initializeStateFromLegacyCheckpoint(getResourceFilename("cep-non-keyed-snapshot-1.1"));
harness.open();
harness.processElement(new StreamRecord<Event>(middleEvent, 3));
harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
harness.processElement(new StreamRecord<>(endEvent, 5));
harness.processWatermark(new Watermark(Long.MAX_VALUE));
ConcurrentLinkedQueue<Object> result = harness.getOutput();
// watermark and the result
assertEquals(2, result.size());
Object resultObject = result.poll();
assertTrue(resultObject instanceof StreamRecord);
StreamRecord<?> resultRecord = (StreamRecord<?>) resultObject;
assertTrue(resultRecord.getValue() instanceof Map);
@SuppressWarnings("unchecked") Map<String, Event> patternMap = (Map<String, Event>) resultRecord.getValue();
assertEquals(startEvent, patternMap.get("start"));
assertEquals(middleEvent, patternMap.get("middle"));
assertEquals(endEvent, patternMap.get("end"));
harness.close();
}
use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.
the class CEPMigration11to13Test method testKeyedCEPOperatorMigratation.
@Test
public void testKeyedCEPOperatorMigratation() throws Exception {
KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {
private static final long serialVersionUID = -4873366487571254798L;
@Override
public Integer getKey(Event value) throws Exception {
return value.getId();
}
};
final Event startEvent = new Event(42, "start", 1.0);
final SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
final Event endEvent = new Event(42, "end", 1.0);
// uncomment these lines for regenerating the snapshot on Flink 1.1
/*
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new OneInputStreamOperatorTestHarness<>(
new KeyedCEPPatternOperator<>(
Event.createTypeSerializer(),
false,
keySelector,
IntSerializer.INSTANCE,
new NFAFactory()));
harness.configureForKeyedStream(keySelector, BasicTypeInfo.INT_TYPE_INFO);
harness.open();
harness.processElement(new StreamRecord<Event>(startEvent, 1));
harness.processElement(new StreamRecord<Event>(new Event(42, "foobar", 1.0), 2));
harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3));
harness.processWatermark(new Watermark(2));
// simulate snapshot/restore with empty element queue but NFA state
StreamTaskState snapshot = harness.snapshot(1, 1);
FileOutputStream out = new FileOutputStream(
"src/test/resources/cep-keyed-snapshot-1.1");
ObjectOutputStream oos = new ObjectOutputStream(out);
oos.writeObject(snapshot);
out.close();
harness.close();
*/
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new KeyedCEPPatternOperator<>(Event.createTypeSerializer(), false, keySelector, IntSerializer.INSTANCE, new NFAFactory(), true), keySelector, BasicTypeInfo.INT_TYPE_INFO);
harness.setup();
harness.initializeStateFromLegacyCheckpoint(getResourceFilename("cep-keyed-snapshot-1.1"));
harness.open();
harness.processElement(new StreamRecord<Event>(middleEvent, 3));
harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
harness.processElement(new StreamRecord<>(endEvent, 5));
harness.processWatermark(new Watermark(20));
ConcurrentLinkedQueue<Object> result = harness.getOutput();
// watermark and the result
assertEquals(2, result.size());
Object resultObject = result.poll();
assertTrue(resultObject instanceof StreamRecord);
StreamRecord<?> resultRecord = (StreamRecord<?>) resultObject;
assertTrue(resultRecord.getValue() instanceof Map);
@SuppressWarnings("unchecked") Map<String, Event> patternMap = (Map<String, Event>) resultRecord.getValue();
assertEquals(startEvent, patternMap.get("start"));
assertEquals(middleEvent, patternMap.get("middle"));
assertEquals(endEvent, patternMap.get("end"));
harness.close();
}
use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.
the class CEPOperatorTest method testKeyedCEPOperatorCheckpointing.
@Test
public void testKeyedCEPOperatorCheckpointing() throws Exception {
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = getCepTestHarness(false);
harness.open();
Event startEvent = new Event(42, "start", 1.0);
SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
Event endEvent = new Event(42, "end", 1.0);
harness.processElement(new StreamRecord<>(startEvent, 1L));
harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 2L));
// simulate snapshot/restore with some elements in internal sorting queue
OperatorStateHandles snapshot = harness.snapshot(0L, 0L);
harness.close();
harness = getCepTestHarness(false);
harness.setup();
harness.initializeState(snapshot);
harness.open();
harness.processWatermark(new Watermark(Long.MIN_VALUE));
harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3L));
// if element timestamps are not correctly checkpointed/restored this will lead to
// a pruning time underflow exception in NFA
harness.processWatermark(new Watermark(2L));
harness.processElement(new StreamRecord<Event>(middleEvent, 3L));
harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4L));
harness.processElement(new StreamRecord<>(endEvent, 5L));
// simulate snapshot/restore with empty element queue but NFA state
OperatorStateHandles snapshot2 = harness.snapshot(1L, 1L);
harness.close();
harness = getCepTestHarness(false);
harness.setup();
harness.initializeState(snapshot2);
harness.open();
harness.processWatermark(new Watermark(Long.MAX_VALUE));
// get and verify the output
Queue<Object> result = harness.getOutput();
assertEquals(2, result.size());
verifyPattern(result.poll(), startEvent, middleEvent, endEvent);
verifyWatermark(result.poll(), Long.MAX_VALUE);
harness.close();
}
use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.
the class TimestampITCase method testTimestampExtractorWithLongMaxWatermarkFromSource.
/**
* This test verifies that the timestamp extractor forwards Long.MAX_VALUE watermarks.
*/
@Test
public void testTimestampExtractorWithLongMaxWatermarkFromSource() throws Exception {
final int NUM_ELEMENTS = 10;
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.getConfig().setAutoWatermarkInterval(1);
env.setParallelism(2);
env.getConfig().disableSysoutLogging();
DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
int index = 1;
while (index <= NUM_ELEMENTS) {
ctx.collectWithTimestamp(index, index);
ctx.collectWithTimestamp(index - 1, index - 1);
index++;
ctx.emitWatermark(new Watermark(index - 2));
}
// emit the final Long.MAX_VALUE watermark, do it twice and verify that
// we only see one in the result
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
}
@Override
public void cancel() {
}
});
source1.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Integer>() {
@Override
public long extractTimestamp(Integer element, long currentTimestamp) {
return element;
}
@Override
public Watermark checkAndGetNextWatermark(Integer element, long extractedTimestamp) {
return null;
}
}).transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true));
env.execute();
Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 1);
Assert.assertTrue(CustomOperator.finalWatermarks[0].get(0).getTimestamp() == Long.MAX_VALUE);
}
Aggregations