use of org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark in project flink by apache.
the class KafkaShuffleITCase method testWatermarkBroadcasting.
/**
* To test value and watermark serialization and deserialization with time characteristic:
* EventTime.
*
* <p>Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1.
*/
@Test
public void testWatermarkBroadcasting() throws Exception {
final int numberOfPartitions = 3;
final int producerParallelism = 2;
final int numElementsPerProducer = 1000;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = testKafkaShuffleProducer(topic("test_watermark_broadcast", EventTime), env, numberOfPartitions, producerParallelism, numElementsPerProducer, EventTime);
TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
// Records in a single partition are kept in order
for (int p = 0; p < numberOfPartitions; p++) {
Collection<ConsumerRecord<byte[], byte[]>> records = results.get(p);
Map<Integer, List<KafkaShuffleWatermark>> watermarks = new HashMap<>();
for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
assertNull(consumerRecord.key());
KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
if (element.isRecord()) {
KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + record.getValue().f0);
assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
} else if (element.isWatermark()) {
KafkaShuffleWatermark watermark = element.asWatermark();
watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>());
watermarks.get(watermark.getSubtask()).add(watermark);
} else {
fail("KafkaShuffleElement is either record or watermark");
}
}
// Besides, watermarks from the same producer sub task should keep in order.
for (List<KafkaShuffleWatermark> subTaskWatermarks : watermarks.values()) {
int index = 0;
assertEquals(numElementsPerProducer + 1, subTaskWatermarks.size());
for (KafkaShuffleWatermark watermark : subTaskWatermarks) {
if (index == numElementsPerProducer) {
// the last element is the watermark that signifies end-of-event-time
assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
} else {
assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + index++);
}
}
}
}
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark in project flink by apache.
the class KafkaShuffleITCase method testRecordSerDe.
private void testRecordSerDe(TimeCharacteristic timeCharacteristic) throws Exception {
final int numElementsPerProducer = 2000;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// Records in a single partition are kept in order
Collection<ConsumerRecord<byte[], byte[]>> records = Iterables.getOnlyElement(testKafkaShuffleProducer(topic("test_serde", timeCharacteristic), env, 1, 1, numElementsPerProducer, timeCharacteristic).values());
switch(timeCharacteristic) {
case ProcessingTime:
// NonTimestampContext, no intermediate watermarks, and one end-of-event-time
// watermark
assertEquals(records.size(), numElementsPerProducer + 1);
break;
case IngestionTime:
// with default interval 200, hence difficult to control the number of watermarks
break;
case EventTime:
// ManualWatermarkContext
// `numElementsPerProducer` records, `numElementsPerProducer` watermarks, and one
// end-of-event-time watermark
assertEquals(records.size(), numElementsPerProducer * 2 + 1);
break;
default:
fail("unknown TimeCharacteristic type");
}
TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
int recordIndex = 0;
int watermarkIndex = 0;
for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
assertNull(consumerRecord.key());
KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
if (element.isRecord()) {
KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
switch(timeCharacteristic) {
case ProcessingTime:
assertNull(record.getTimestamp());
break;
case IngestionTime:
assertNotNull(record.getTimestamp());
break;
case EventTime:
assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
break;
default:
fail("unknown TimeCharacteristic type");
}
assertEquals(record.getValue().f0.intValue(), recordIndex);
assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + recordIndex);
assertEquals(record.getValue().f2.intValue(), 0);
recordIndex++;
} else if (element.isWatermark()) {
KafkaShuffleWatermark watermark = element.asWatermark();
switch(timeCharacteristic) {
case ProcessingTime:
assertEquals(watermark.getSubtask(), 0);
// the last element is the watermark that signifies end-of-event-time
assertEquals(numElementsPerProducer, recordIndex);
assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
break;
case IngestionTime:
break;
case EventTime:
assertEquals(watermark.getSubtask(), 0);
if (watermarkIndex == recordIndex) {
// the last element is the watermark that signifies end-of-event-time
assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
} else {
assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + watermarkIndex);
}
break;
default:
fail("unknown TimeCharacteristic type");
}
watermarkIndex++;
} else {
fail("KafkaShuffleElement is either record or watermark");
}
}
}
Aggregations