Search in sources :

Example 1 with AssignerWithPunctuatedWatermarks

use of org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks in project flink by apache.

the class AbstractFetcherTest method testSkipCorruptedRecordWithPunctuatedWatermarks.

@Test
public void testSkipCorruptedRecordWithPunctuatedWatermarks() throws Exception {
    final String testTopic = "test topic name";
    Map<KafkaTopicPartition, Long> originalPartitions = new HashMap<>();
    originalPartitions.put(new KafkaTopicPartition(testTopic, 1), KafkaTopicPartitionStateSentinel.LATEST_OFFSET);
    TestSourceContext<Long> sourceContext = new TestSourceContext<>();
    TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService();
    TestFetcher<Long> fetcher = new TestFetcher<>(sourceContext, originalPartitions, null, /* periodic watermark assigner */
    new SerializedValue<AssignerWithPunctuatedWatermarks<Long>>(new PunctuatedTestExtractor()), /* punctuated watermark assigner */
    processingTimeProvider, 0);
    final KafkaTopicPartitionState<Object> partitionStateHolder = fetcher.subscribedPartitionStates()[0];
    // elements generate a watermark if the timestamp is a multiple of three
    fetcher.emitRecord(1L, partitionStateHolder, 1L);
    fetcher.emitRecord(2L, partitionStateHolder, 2L);
    fetcher.emitRecord(3L, partitionStateHolder, 3L);
    assertEquals(3L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(3L, sourceContext.getLatestElement().getTimestamp());
    assertTrue(sourceContext.hasWatermark());
    assertEquals(3L, sourceContext.getLatestWatermark().getTimestamp());
    assertEquals(3L, partitionStateHolder.getOffset());
    // emit null record
    fetcher.emitRecord(null, partitionStateHolder, 4L);
    // no elements or watermarks should have been collected
    assertEquals(3L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(3L, sourceContext.getLatestElement().getTimestamp());
    assertFalse(sourceContext.hasWatermark());
    // the offset in state still should have advanced
    assertEquals(4L, partitionStateHolder.getOffset());
}
Also used : HashMap(java.util.HashMap) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) Test(org.junit.Test)

Example 2 with AssignerWithPunctuatedWatermarks

use of org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks in project flink by apache.

the class CEPITCase method testSimplePatternEventTime.

@Test
public void testSimplePatternEventTime() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(envConfiguration);
    // (Event, timestamp)
    DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 5L), Tuple2.of(new Event(2, "middle", 2.0), 1L), Tuple2.of(new Event(3, "end", 3.0), 3L), Tuple2.of(new Event(4, "end", 4.0), 10L), Tuple2.of(new Event(5, "middle", 5.0), 7L), // last element for high final watermark
    Tuple2.of(new Event(5, "middle", 5.0), 100L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

        @Override
        public long extractTimestamp(Tuple2<Event, Long> element, long previousTimestamp) {
            return element.f1;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
            return new Watermark(lastElement.f1 - 5);
        }
    }).map(new MapFunction<Tuple2<Event, Long>, Event>() {

        @Override
        public Event map(Tuple2<Event, Long> value) throws Exception {
            return value.f0;
        }
    });
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedByAny("middle").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("middle");
        }
    }).followedByAny("end").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    DataStream<String> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, List<Event>> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").get(0).getId()).append(",").append(pattern.get("middle").get(0).getId()).append(",").append(pattern.get("end").get(0).getId());
            return builder.toString();
        }
    });
    List<String> resultList = new ArrayList<>();
    DataStreamUtils.collect(result).forEachRemaining(resultList::add);
    resultList.sort(String::compareTo);
    assertEquals(Arrays.asList("1,5,4"), resultList);
}
Also used : ArrayList(java.util.ArrayList) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 3 with AssignerWithPunctuatedWatermarks

use of org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks in project flink by apache.

the class TimestampITCase method testTimestampExtractorWithLongMaxWatermarkFromSource.

/**
 * This test verifies that the timestamp extractor forwards Long.MAX_VALUE watermarks.
 */
@Test
public void testTimestampExtractorWithLongMaxWatermarkFromSource() throws Exception {
    final int numElements = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setAutoWatermarkInterval(1);
    env.setParallelism(2);
    DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            int index = 1;
            while (index <= numElements) {
                ctx.collectWithTimestamp(index, index);
                ctx.collectWithTimestamp(index - 1, index - 1);
                index++;
                ctx.emitWatermark(new Watermark(index - 2));
            }
            // emit the final Long.MAX_VALUE watermark, do it twice and verify
            // that
            // we only see one in the result
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
            ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
        }

        @Override
        public void cancel() {
        }
    });
    source1.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Integer>() {

        @Override
        public long extractTimestamp(Integer element, long currentTimestamp) {
            return element;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Integer element, long extractedTimestamp) {
            return null;
        }
    }).transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true));
    env.execute();
    Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 1);
    Assert.assertTrue(CustomOperator.finalWatermarks[0].get(0).getTimestamp() == Long.MAX_VALUE);
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Test(org.junit.Test)

Example 4 with AssignerWithPunctuatedWatermarks

use of org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks in project flink by apache.

the class AbstractFetcherTest method testPunctuatedWatermarks.

// ------------------------------------------------------------------------
//   Timestamps & watermarks tests
// ------------------------------------------------------------------------
@Test
public void testPunctuatedWatermarks() throws Exception {
    final String testTopic = "test topic name";
    Map<KafkaTopicPartition, Long> originalPartitions = new HashMap<>();
    originalPartitions.put(new KafkaTopicPartition(testTopic, 7), KafkaTopicPartitionStateSentinel.LATEST_OFFSET);
    originalPartitions.put(new KafkaTopicPartition(testTopic, 13), KafkaTopicPartitionStateSentinel.LATEST_OFFSET);
    originalPartitions.put(new KafkaTopicPartition(testTopic, 21), KafkaTopicPartitionStateSentinel.LATEST_OFFSET);
    TestSourceContext<Long> sourceContext = new TestSourceContext<>();
    TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService();
    TestFetcher<Long> fetcher = new TestFetcher<>(sourceContext, originalPartitions, null, /* periodic watermark assigner */
    new SerializedValue<AssignerWithPunctuatedWatermarks<Long>>(new PunctuatedTestExtractor()), processingTimeProvider, 0);
    final KafkaTopicPartitionState<Object> part1 = fetcher.subscribedPartitionStates()[0];
    final KafkaTopicPartitionState<Object> part2 = fetcher.subscribedPartitionStates()[1];
    final KafkaTopicPartitionState<Object> part3 = fetcher.subscribedPartitionStates()[2];
    // elements generate a watermark if the timestamp is a multiple of three
    // elements for partition 1
    fetcher.emitRecord(1L, part1, 1L);
    fetcher.emitRecord(2L, part1, 2L);
    fetcher.emitRecord(3L, part1, 3L);
    assertEquals(3L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(3L, sourceContext.getLatestElement().getTimestamp());
    assertFalse(sourceContext.hasWatermark());
    // elements for partition 2
    fetcher.emitRecord(12L, part2, 1L);
    assertEquals(12L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(12L, sourceContext.getLatestElement().getTimestamp());
    assertFalse(sourceContext.hasWatermark());
    // elements for partition 3
    fetcher.emitRecord(101L, part3, 1L);
    fetcher.emitRecord(102L, part3, 2L);
    assertEquals(102L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(102L, sourceContext.getLatestElement().getTimestamp());
    // now, we should have a watermark
    assertTrue(sourceContext.hasWatermark());
    assertEquals(3L, sourceContext.getLatestWatermark().getTimestamp());
    // advance partition 3
    fetcher.emitRecord(1003L, part3, 3L);
    fetcher.emitRecord(1004L, part3, 4L);
    fetcher.emitRecord(1005L, part3, 5L);
    assertEquals(1005L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(1005L, sourceContext.getLatestElement().getTimestamp());
    // advance partition 1 beyond partition 2 - this bumps the watermark
    fetcher.emitRecord(30L, part1, 4L);
    assertEquals(30L, sourceContext.getLatestElement().getValue().longValue());
    assertEquals(30L, sourceContext.getLatestElement().getTimestamp());
    assertTrue(sourceContext.hasWatermark());
    assertEquals(12L, sourceContext.getLatestWatermark().getTimestamp());
    // advance partition 2 again - this bumps the watermark
    fetcher.emitRecord(13L, part2, 2L);
    assertFalse(sourceContext.hasWatermark());
    fetcher.emitRecord(14L, part2, 3L);
    assertFalse(sourceContext.hasWatermark());
    fetcher.emitRecord(15L, part2, 3L);
    assertTrue(sourceContext.hasWatermark());
    assertEquals(15L, sourceContext.getLatestWatermark().getTimestamp());
}
Also used : HashMap(java.util.HashMap) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) Test(org.junit.Test)

Example 5 with AssignerWithPunctuatedWatermarks

use of org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks in project flink by apache.

the class CEPITCase method testSimplePatternEventTimeWithComparator.

/**
 * Checks that a certain event sequence is recognized.
 *
 * @throws Exception
 */
@Test
public void testSimplePatternEventTimeWithComparator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(envConfiguration);
    // (Event, timestamp)
    DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 5L), Tuple2.of(new Event(2, "middle", 2.0), 1L), Tuple2.of(new Event(3, "end", 3.0), 3L), Tuple2.of(new Event(4, "end", 4.0), 10L), Tuple2.of(new Event(5, "middle", 6.0), 7L), Tuple2.of(new Event(6, "middle", 5.0), 7L), // last element for high final watermark
    Tuple2.of(new Event(7, "middle", 5.0), 100L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

        @Override
        public long extractTimestamp(Tuple2<Event, Long> element, long previousTimestamp) {
            return element.f1;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
            return new Watermark(lastElement.f1 - 5);
        }
    }).map(new MapFunction<Tuple2<Event, Long>, Event>() {

        @Override
        public Event map(Tuple2<Event, Long> value) throws Exception {
            return value.f0;
        }
    });
    EventComparator<Event> comparator = new CustomEventComparator();
    Pattern<Event, ? extends Event> pattern = Pattern.<Event>begin("start").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedByAny("middle").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("middle");
        }
    }).followedByAny("end").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    DataStream<String> result = CEP.pattern(input, pattern, comparator).select(new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, List<Event>> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").get(0).getId()).append(",").append(pattern.get("middle").get(0).getId()).append(",").append(pattern.get("end").get(0).getId());
            return builder.toString();
        }
    });
    List<String> resultList = new ArrayList<>();
    DataStreamUtils.collect(result).forEachRemaining(resultList::add);
    List<String> expected = Arrays.asList("1,6,4", "1,5,4");
    expected.sort(String::compareTo);
    resultList.sort(String::compareTo);
    assertEquals(expected, resultList);
}
Also used : ArrayList(java.util.ArrayList) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Aggregations

AssignerWithPunctuatedWatermarks (org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks)6 Test (org.junit.Test)6 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 Watermark (org.apache.flink.streaming.api.watermark.Watermark)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 HashMap (java.util.HashMap)2 TestProcessingTimeService (org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService)2 Duration (java.time.Duration)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Comparator (java.util.Comparator)1 Map (java.util.Map)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)1 Types (org.apache.flink.api.common.typeinfo.Types)1 LongSerializer (org.apache.flink.api.common.typeutils.base.LongSerializer)1 KeySelector (org.apache.flink.api.java.functions.KeySelector)1 CEPCacheOptions (org.apache.flink.cep.configuration.CEPCacheOptions)1