Search in sources :

Example 1 with KafkaShuffleRecord

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleRecord in project flink by apache.

the class KafkaShuffleITCase method testWatermarkBroadcasting.

/**
 * To test value and watermark serialization and deserialization with time characteristic:
 * EventTime.
 *
 * <p>Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1.
 */
@Test
public void testWatermarkBroadcasting() throws Exception {
    final int numberOfPartitions = 3;
    final int producerParallelism = 2;
    final int numElementsPerProducer = 1000;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = testKafkaShuffleProducer(topic("test_watermark_broadcast", EventTime), env, numberOfPartitions, producerParallelism, numElementsPerProducer, EventTime);
    TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
    KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
    // Records in a single partition are kept in order
    for (int p = 0; p < numberOfPartitions; p++) {
        Collection<ConsumerRecord<byte[], byte[]>> records = results.get(p);
        Map<Integer, List<KafkaShuffleWatermark>> watermarks = new HashMap<>();
        for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
            assertNull(consumerRecord.key());
            KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
            if (element.isRecord()) {
                KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
                assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + record.getValue().f0);
                assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
            } else if (element.isWatermark()) {
                KafkaShuffleWatermark watermark = element.asWatermark();
                watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>());
                watermarks.get(watermark.getSubtask()).add(watermark);
            } else {
                fail("KafkaShuffleElement is either record or watermark");
            }
        }
        // Besides, watermarks from the same producer sub task should keep in order.
        for (List<KafkaShuffleWatermark> subTaskWatermarks : watermarks.values()) {
            int index = 0;
            assertEquals(numElementsPerProducer + 1, subTaskWatermarks.size());
            for (KafkaShuffleWatermark watermark : subTaskWatermarks) {
                if (index == numElementsPerProducer) {
                    // the last element is the watermark that signifies end-of-event-time
                    assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
                } else {
                    assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + index++);
                }
            }
        }
    }
}
Also used : KafkaShuffleElementDeserializer(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElementDeserializer) PARTITION_NUMBER(org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PARTITION_NUMBER) Tuple3(org.apache.flink.api.java.tuple.Tuple3) EventTime(org.apache.flink.streaming.api.TimeCharacteristic.EventTime) IngestionTime(org.apache.flink.streaming.api.TimeCharacteristic.IngestionTime) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) PropertiesUtil(org.apache.flink.util.PropertiesUtil) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) ArrayList(java.util.ArrayList) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Map(java.util.Map) Timeout(org.junit.rules.Timeout) Assert.fail(org.junit.Assert.fail) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) PRODUCER_PARALLELISM(org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PRODUCER_PARALLELISM) Tuple(org.apache.flink.api.java.tuple.Tuple) TestUtils.tryExecute(org.apache.flink.test.util.TestUtils.tryExecute) TimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Properties(java.util.Properties) ProcessingTime(org.apache.flink.streaming.api.TimeCharacteristic.ProcessingTime) Assert.assertNotNull(org.junit.Assert.assertNotNull) KafkaShuffleElement(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElement) Collection(java.util.Collection) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) Test(org.junit.Test) KafkaShuffleElementDeserializer(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElementDeserializer) KafkaShuffleWatermark(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark) DataStream(org.apache.flink.streaming.api.datastream.DataStream) List(java.util.List) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) KafkaShuffleRecord(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleRecord) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KafkaShuffleWatermark(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark) HashMap(java.util.HashMap) KafkaShuffleElement(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElement) ArrayList(java.util.ArrayList) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collection(java.util.Collection) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Properties (java.util.Properties)1 RestartStrategies (org.apache.flink.api.common.restartstrategy.RestartStrategies)1 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)1 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)1 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)1 Iterables (org.apache.flink.shaded.guava30.com.google.common.collect.Iterables)1 Lists (org.apache.flink.shaded.guava30.com.google.common.collect.Lists)1 TimeCharacteristic (org.apache.flink.streaming.api.TimeCharacteristic)1 EventTime (org.apache.flink.streaming.api.TimeCharacteristic.EventTime)1 IngestionTime (org.apache.flink.streaming.api.TimeCharacteristic.IngestionTime)1 ProcessingTime (org.apache.flink.streaming.api.TimeCharacteristic.ProcessingTime)1 DataStream (org.apache.flink.streaming.api.datastream.DataStream)1 KeyedStream (org.apache.flink.streaming.api.datastream.KeyedStream)1