use of org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleRecord in project flink by apache.
the class KafkaShuffleITCase method testWatermarkBroadcasting.
/**
* To test value and watermark serialization and deserialization with time characteristic:
* EventTime.
*
* <p>Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1.
*/
@Test
public void testWatermarkBroadcasting() throws Exception {
final int numberOfPartitions = 3;
final int producerParallelism = 2;
final int numElementsPerProducer = 1000;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = testKafkaShuffleProducer(topic("test_watermark_broadcast", EventTime), env, numberOfPartitions, producerParallelism, numElementsPerProducer, EventTime);
TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
// Records in a single partition are kept in order
for (int p = 0; p < numberOfPartitions; p++) {
Collection<ConsumerRecord<byte[], byte[]>> records = results.get(p);
Map<Integer, List<KafkaShuffleWatermark>> watermarks = new HashMap<>();
for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
assertNull(consumerRecord.key());
KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
if (element.isRecord()) {
KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + record.getValue().f0);
assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
} else if (element.isWatermark()) {
KafkaShuffleWatermark watermark = element.asWatermark();
watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>());
watermarks.get(watermark.getSubtask()).add(watermark);
} else {
fail("KafkaShuffleElement is either record or watermark");
}
}
// Besides, watermarks from the same producer sub task should keep in order.
for (List<KafkaShuffleWatermark> subTaskWatermarks : watermarks.values()) {
int index = 0;
assertEquals(numElementsPerProducer + 1, subTaskWatermarks.size());
for (KafkaShuffleWatermark watermark : subTaskWatermarks) {
if (index == numElementsPerProducer) {
// the last element is the watermark that signifies end-of-event-time
assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
} else {
assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + index++);
}
}
}
}
}
Aggregations