Search in sources :

Example 71 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.

the class KafkaRecordDeserializationSchemaTest method getConsumerRecord.

private ConsumerRecord<byte[], byte[]> getConsumerRecord() throws JsonProcessingException {
    ObjectMapper mapper = new ObjectMapper();
    ObjectNode initialKey = mapper.createObjectNode();
    initialKey.put("index", 4);
    byte[] serializedKey = mapper.writeValueAsBytes(initialKey);
    ObjectNode initialValue = mapper.createObjectNode();
    initialValue.put("word", "world");
    byte[] serializedValue = mapper.writeValueAsBytes(initialValue);
    return new ConsumerRecord<>("topic#1", 3, 4L, serializedKey, serializedValue);
}
Also used : ObjectNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode) ObjectMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord)

Example 72 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.

the class KafkaRecordDeserializationSchemaTest method testKafkaValueDeserializerWrapper.

@Test
public void testKafkaValueDeserializerWrapper() throws Exception {
    final String topic = "Topic";
    byte[] value = new StringSerializer().serialize(topic, "world");
    final ConsumerRecord<byte[], byte[]> consumerRecord = new ConsumerRecord<>(topic, 0, 0L, null, value);
    KafkaRecordDeserializationSchema<String> schema = KafkaRecordDeserializationSchema.valueOnly(StringDeserializer.class);
    schema.open(new TestingDeserializationContext());
    SimpleCollector<String> collector = new SimpleCollector<>();
    schema.deserialize(consumerRecord, collector);
    assertEquals(1, collector.list.size());
    assertEquals("world", collector.list.get(0));
}
Also used : TestingDeserializationContext(org.apache.flink.connector.testutils.source.deserialization.TestingDeserializationContext) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 73 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.

the class KafkaPartitionSplitReader method fetch.

@Override
public RecordsWithSplitIds<ConsumerRecord<byte[], byte[]>> fetch() throws IOException {
    ConsumerRecords<byte[], byte[]> consumerRecords;
    try {
        consumerRecords = consumer.poll(Duration.ofMillis(POLL_TIMEOUT));
    } catch (WakeupException we) {
        return new KafkaPartitionSplitRecords(ConsumerRecords.empty(), kafkaSourceReaderMetrics);
    }
    KafkaPartitionSplitRecords recordsBySplits = new KafkaPartitionSplitRecords(consumerRecords, kafkaSourceReaderMetrics);
    List<TopicPartition> finishedPartitions = new ArrayList<>();
    for (TopicPartition tp : consumerRecords.partitions()) {
        long stoppingOffset = getStoppingOffset(tp);
        final List<ConsumerRecord<byte[], byte[]>> recordsFromPartition = consumerRecords.records(tp);
        if (recordsFromPartition.size() > 0) {
            final ConsumerRecord<byte[], byte[]> lastRecord = recordsFromPartition.get(recordsFromPartition.size() - 1);
            // exist. Keep polling will just block forever.
            if (lastRecord.offset() >= stoppingOffset - 1) {
                recordsBySplits.setPartitionStoppingOffset(tp, stoppingOffset);
                finishSplitAtRecord(tp, stoppingOffset, lastRecord.offset(), finishedPartitions, recordsBySplits);
            }
        }
        // Track this partition's record lag if it never appears before
        kafkaSourceReaderMetrics.maybeAddRecordsLagMetric(consumer, tp);
    }
    // added to finished splits to clean up states in split fetcher and source reader.
    if (!emptySplits.isEmpty()) {
        recordsBySplits.finishedSplits.addAll(emptySplits);
        emptySplits.clear();
    }
    // Unassign the partitions that has finished.
    if (!finishedPartitions.isEmpty()) {
        finishedPartitions.forEach(kafkaSourceReaderMetrics::removeRecordsLagMetric);
        unassignPartitions(finishedPartitions);
    }
    // Update numBytesIn
    kafkaSourceReaderMetrics.updateNumBytesInCounter();
    return recordsBySplits;
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ArrayList(java.util.ArrayList) WakeupException(org.apache.kafka.common.errors.WakeupException) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord)

Example 74 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.

the class KafkaSinkITCase method testAbortTransactionsAfterScaleInBeforeFirstCheckpoint.

@Test
public void testAbortTransactionsAfterScaleInBeforeFirstCheckpoint() throws Exception {
    // Run a first job opening 5 transactions one per subtask and fail in async checkpoint phase
    final Configuration config = new Configuration();
    config.set(CoreOptions.DEFAULT_PARALLELISM, 5);
    try {
        executeWithMapper(new FailAsyncCheckpointMapper(0), config, null);
    } catch (Exception e) {
        assertThat(e.getCause().getCause().getMessage(), containsString("Exceeded checkpoint tolerable failure"));
    }
    assertTrue(deserializeValues(drainAllRecordsFromTopic(topic, true)).isEmpty());
    // Second job aborts all transactions from previous runs with higher parallelism
    config.set(CoreOptions.DEFAULT_PARALLELISM, 1);
    failed.get().set(true);
    executeWithMapper(new FailingCheckpointMapper(failed, lastCheckpointedRecord), config, null);
    final List<ConsumerRecord<byte[], byte[]>> collectedRecords = drainAllRecordsFromTopic(topic, true);
    assertThat(deserializeValues(collectedRecords), contains(LongStream.range(1, lastCheckpointedRecord.get().get() + 1).boxed().toArray()));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 75 with ConsumerRecord

use of org.apache.kafka.clients.consumer.ConsumerRecord in project flink by apache.

the class KafkaShuffleITCase method testWatermarkBroadcasting.

/**
 * To test value and watermark serialization and deserialization with time characteristic:
 * EventTime.
 *
 * <p>Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1.
 */
@Test
public void testWatermarkBroadcasting() throws Exception {
    final int numberOfPartitions = 3;
    final int producerParallelism = 2;
    final int numElementsPerProducer = 1000;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = testKafkaShuffleProducer(topic("test_watermark_broadcast", EventTime), env, numberOfPartitions, producerParallelism, numElementsPerProducer, EventTime);
    TypeSerializer<Tuple3<Integer, Long, Integer>> typeSerializer = createTypeSerializer(env);
    KafkaShuffleElementDeserializer deserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
    // Records in a single partition are kept in order
    for (int p = 0; p < numberOfPartitions; p++) {
        Collection<ConsumerRecord<byte[], byte[]>> records = results.get(p);
        Map<Integer, List<KafkaShuffleWatermark>> watermarks = new HashMap<>();
        for (ConsumerRecord<byte[], byte[]> consumerRecord : records) {
            assertNull(consumerRecord.key());
            KafkaShuffleElement element = deserializer.deserialize(consumerRecord);
            if (element.isRecord()) {
                KafkaShuffleRecord<Tuple3<Integer, Long, Integer>> record = element.asRecord();
                assertEquals(record.getValue().f1.longValue(), INIT_TIMESTAMP + record.getValue().f0);
                assertEquals(record.getTimestamp().longValue(), record.getValue().f1.longValue());
            } else if (element.isWatermark()) {
                KafkaShuffleWatermark watermark = element.asWatermark();
                watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>());
                watermarks.get(watermark.getSubtask()).add(watermark);
            } else {
                fail("KafkaShuffleElement is either record or watermark");
            }
        }
        // Besides, watermarks from the same producer sub task should keep in order.
        for (List<KafkaShuffleWatermark> subTaskWatermarks : watermarks.values()) {
            int index = 0;
            assertEquals(numElementsPerProducer + 1, subTaskWatermarks.size());
            for (KafkaShuffleWatermark watermark : subTaskWatermarks) {
                if (index == numElementsPerProducer) {
                    // the last element is the watermark that signifies end-of-event-time
                    assertEquals(watermark.getWatermark(), Watermark.MAX_WATERMARK.getTimestamp());
                } else {
                    assertEquals(watermark.getWatermark(), INIT_TIMESTAMP + index++);
                }
            }
        }
    }
}
Also used : KafkaShuffleElementDeserializer(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElementDeserializer) PARTITION_NUMBER(org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PARTITION_NUMBER) Tuple3(org.apache.flink.api.java.tuple.Tuple3) EventTime(org.apache.flink.streaming.api.TimeCharacteristic.EventTime) IngestionTime(org.apache.flink.streaming.api.TimeCharacteristic.IngestionTime) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) PropertiesUtil(org.apache.flink.util.PropertiesUtil) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) ArrayList(java.util.ArrayList) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Map(java.util.Map) Timeout(org.junit.rules.Timeout) Assert.fail(org.junit.Assert.fail) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) PRODUCER_PARALLELISM(org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PRODUCER_PARALLELISM) Tuple(org.apache.flink.api.java.tuple.Tuple) TestUtils.tryExecute(org.apache.flink.test.util.TestUtils.tryExecute) TimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Properties(java.util.Properties) ProcessingTime(org.apache.flink.streaming.api.TimeCharacteristic.ProcessingTime) Assert.assertNotNull(org.junit.Assert.assertNotNull) KafkaShuffleElement(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElement) Collection(java.util.Collection) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) Test(org.junit.Test) KafkaShuffleElementDeserializer(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElementDeserializer) KafkaShuffleWatermark(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark) DataStream(org.apache.flink.streaming.api.datastream.DataStream) List(java.util.List) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) KafkaShuffleRecord(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleRecord) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KafkaShuffleWatermark(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark) HashMap(java.util.HashMap) KafkaShuffleElement(org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElement) ArrayList(java.util.ArrayList) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collection(java.util.Collection) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Aggregations

ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)314 TopicPartition (org.apache.kafka.common.TopicPartition)160 Test (org.junit.Test)145 ArrayList (java.util.ArrayList)123 List (java.util.List)100 HashMap (java.util.HashMap)98 Map (java.util.Map)70 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)61 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)51 Test (org.junit.jupiter.api.Test)35 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)33 KafkaConsumer (org.apache.kafka.clients.consumer.KafkaConsumer)31 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)31 LinkedHashMap (java.util.LinkedHashMap)30 Header (org.apache.kafka.common.header.Header)29 RecordHeader (org.apache.kafka.common.header.internals.RecordHeader)28 TimeUnit (java.util.concurrent.TimeUnit)27 Set (java.util.Set)24 Collectors (java.util.stream.Collectors)24 ByteBuffer (java.nio.ByteBuffer)22