use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.
the class KafkaRecordDeserializationSchemaTest method getConsumerRecord.
private ConsumerRecord<byte[], byte[]> getConsumerRecord() throws JsonProcessingException {
ObjectMapper mapper = new ObjectMapper();
ObjectNode initialKey = mapper.createObjectNode();
initialKey.put("index", 4);
byte[] serializedKey = mapper.writeValueAsBytes(initialKey);
ObjectNode initialValue = mapper.createObjectNode();
initialValue.put("word", "world");
byte[] serializedValue = mapper.writeValueAsBytes(initialValue);
return new ConsumerRecord<>("topic#1", 3, 4L, serializedKey, serializedValue);
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.
the class KafkaRecordDeserializationSchemaTest method testKafkaValueDeserializerWrapper.
@Test
public void testKafkaValueDeserializerWrapper() throws Exception {
final String topic = "Topic";
byte[] value = new StringSerializer().serialize(topic, "world");
final ConsumerRecord<byte[], byte[]> consumerRecord = new ConsumerRecord<>(topic, 0, 0L, null, value);
KafkaRecordDeserializationSchema<String> schema = KafkaRecordDeserializationSchema.valueOnly(StringDeserializer.class);
schema.open(new TestingDeserializationContext());
SimpleCollector<String> collector = new SimpleCollector<>();
schema.deserialize(consumerRecord, collector);
assertEquals(1, collector.list.size());
assertEquals("world", collector.list.get(0));
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.
the class KafkaPartitionSplitReader method fetch.
@Override
public RecordsWithSplitIds<ConsumerRecord<byte[], byte[]>> fetch() throws IOException {
ConsumerRecords<byte[], byte[]> consumerRecords;
try {
consumerRecords = consumer.poll(Duration.ofMillis(POLL_TIMEOUT));
} catch (WakeupException we) {
return new KafkaPartitionSplitRecords(ConsumerRecords.empty(), kafkaSourceReaderMetrics);
}
KafkaPartitionSplitRecords recordsBySplits = new KafkaPartitionSplitRecords(consumerRecords, kafkaSourceReaderMetrics);
List<TopicPartition> finishedPartitions = new ArrayList<>();
for (TopicPartition tp : consumerRecords.partitions()) {
long stoppingOffset = getStoppingOffset(tp);
final List<ConsumerRecord<byte[], byte[]>> recordsFromPartition = consumerRecords.records(tp);
if (recordsFromPartition.size() > 0) {
final ConsumerRecord<byte[], byte[]> lastRecord = recordsFromPartition.get(recordsFromPartition.size() - 1);
// exist. Keep polling will just block forever.
if (lastRecord.offset() >= stoppingOffset - 1) {
recordsBySplits.setPartitionStoppingOffset(tp, stoppingOffset);
finishSplitAtRecord(tp, stoppingOffset, lastRecord.offset(), finishedPartitions, recordsBySplits);
}
}
// Track this partition's record lag if it never appears before
kafkaSourceReaderMetrics.maybeAddRecordsLagMetric(consumer, tp);
}
// added to finished splits to clean up states in split fetcher and source reader.
if (!emptySplits.isEmpty()) {
recordsBySplits.finishedSplits.addAll(emptySplits);
emptySplits.clear();
}
// Unassign the partitions that has finished.
if (!finishedPartitions.isEmpty()) {
finishedPartitions.forEach(kafkaSourceReaderMetrics::removeRecordsLagMetric);
unassignPartitions(finishedPartitions);
}
// Update numBytesIn
kafkaSourceReaderMetrics.updateNumBytesInCounter();
return recordsBySplits;
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.
the class KafkaSinkITCase method testAbortTransactionsAfterScaleInBeforeFirstCheckpoint.
@Test
public void testAbortTransactionsAfterScaleInBeforeFirstCheckpoint() throws Exception {
// Run a first job opening 5 transactions one per subtask and fail in async checkpoint phase
final Configuration config = new Configuration();
config.set(CoreOptions.DEFAULT_PARALLELISM, 5);
try {
executeWithMapper(new FailAsyncCheckpointMapper(0), config, null);
} catch (Exception e) {
assertThat(e.getCause().getCause().getMessage(), containsString("Exceeded checkpoint tolerable failure"));
}
assertTrue(deserializeValues(drainAllRecordsFromTopic(topic, true)).isEmpty());
// Second job aborts all transactions from previous runs with higher parallelism
config.set(CoreOptions.DEFAULT_PARALLELISM, 1);
failed.get().set(true);
executeWithMapper(new FailingCheckpointMapper(failed, lastCheckpointedRecord), config, null);
final List<ConsumerRecord<byte[], byte[]>> collectedRecords = drainAllRecordsFromTopic(topic, true);
assertThat(deserializeValues(collectedRecords), contains(LongStream.range(1, lastCheckpointedRecord.get().get() + 1).boxed().toArray()));
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.
the class KafkaShuffleITCase method testWatermarkBroadcasting.
/**
* To test value and watermark serialization and deserialization with time characteristic:
* EventTime.
*
* <p>Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1.
*/
@Test
public void testWatermarkBroadcasting() throws Exception {
final int numberOfPartitions = 3;
final int producerParallelism = 2;
final int numElementsPerProducer = 1000;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = testKafkaShuffleProducer(topic("test_watermark_broadcast", EventTime), env, numberOfPartitions, producerParallelism, numElementsPerProducer, EventTime);
TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
// Records in a single partition are kept in order
for (int p = 0; p < numberOfPartitions; p++) {
Collection<ConsumerRecord<byte[], byte[]>> records = results.get(p);
Map<Integer, List<KafkaShuffleWatermark>> watermarks = new HashMap<>();
for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
assertNull(consumerRecord.key());
KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
if (element.isRecord()) {
KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + record.getValue().f0);
assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
} else if (element.isWatermark()) {
KafkaShuffleWatermark watermark = element.asWatermark();
watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>());
watermarks.get(watermark.getSubtask()).add(watermark);
} else {
fail("KafkaShuffleElement is either record or watermark");
}
}
// Besides, watermarks from the same producer sub task should keep in order.
for (List<KafkaShuffleWatermark> subTaskWatermarks : watermarks.values()) {
int index = 0;
assertEquals(numElementsPerProducer + 1, subTaskWatermarks.size());
for (KafkaShuffleWatermark watermark : subTaskWatermarks) {
if (index == numElementsPerProducer) {
// the last element is the watermark that signifies end-of-event-time
assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
} else {
assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + index++);
}
}
}
}
}
Aggregations