Search in sources :

Example 1 with OffsetResetStrategy

use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project flink by apache.

the class KafkaDynamicSource method createKafkaSource.

// --------------------------------------------------------------------------------------------
protected KafkaSource<RowData> createKafkaSource(DeserializationSchema<RowData> keyDeserialization, DeserializationSchema<RowData> valueDeserialization, TypeInformation<RowData> producedTypeInfo) {
    final KafkaDeserializationSchema<RowData> kafkaDeserializer = createKafkaDeserializationSchema(keyDeserialization, valueDeserialization, producedTypeInfo);
    final KafkaSourceBuilder<RowData> kafkaSourceBuilder = KafkaSource.builder();
    if (topics != null) {
        kafkaSourceBuilder.setTopics(topics);
    } else {
        kafkaSourceBuilder.setTopicPattern(topicPattern);
    }
    switch(startupMode) {
        case EARLIEST:
            kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.earliest());
            break;
        case LATEST:
            kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.latest());
            break;
        case GROUP_OFFSETS:
            String offsetResetConfig = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.NONE.name());
            OffsetResetStrategy offsetResetStrategy = getResetStrategy(offsetResetConfig);
            kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.committedOffsets(offsetResetStrategy));
            break;
        case SPECIFIC_OFFSETS:
            Map<TopicPartition, Long> offsets = new HashMap<>();
            specificStartupOffsets.forEach((tp, offset) -> offsets.put(new TopicPartition(tp.getTopic(), tp.getPartition()), offset));
            kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.offsets(offsets));
            break;
        case TIMESTAMP:
            kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.timestamp(startupTimestampMillis));
            break;
    }
    kafkaSourceBuilder.setProperties(properties).setDeserializer(KafkaRecordDeserializationSchema.of(kafkaDeserializer));
    return kafkaSourceBuilder.build();
}
Also used : RowData(org.apache.flink.table.data.RowData) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)

Example 2 with OffsetResetStrategy

use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project beam by apache.

the class KafkaIOTest method mkMockConsumer.

// Update mock consumer with records distributed among the given topics, each with given number
// of partitions. Records are assigned in round-robin order among the partitions.
private static MockConsumer<byte[], byte[]> mkMockConsumer(List<String> topics, int partitionsPerTopic, int numElements, OffsetResetStrategy offsetResetStrategy, Map<String, Object> config, SerializableFunction<Integer, byte[]> keyFunction, SerializableFunction<Integer, byte[]> valueFunction) {
    final List<TopicPartition> partitions = new ArrayList<>();
    final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = new HashMap<>();
    Map<String, List<PartitionInfo>> partitionMap = new HashMap<>();
    for (String topic : topics) {
        List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
        for (int i = 0; i < partitionsPerTopic; i++) {
            TopicPartition tp = new TopicPartition(topic, i);
            partitions.add(tp);
            partIds.add(new PartitionInfo(topic, i, null, null, null));
            records.put(tp, new ArrayList<>());
        }
        partitionMap.put(topic, partIds);
    }
    int numPartitions = partitions.size();
    final long[] offsets = new long[numPartitions];
    long timestampStartMillis = (Long) config.getOrDefault(TIMESTAMP_START_MILLIS_CONFIG, LOG_APPEND_START_TIME.getMillis());
    TimestampType timestampType = TimestampType.forName((String) config.getOrDefault(TIMESTAMP_TYPE_CONFIG, TimestampType.LOG_APPEND_TIME.toString()));
    for (int i = 0; i < numElements; i++) {
        int pIdx = i % numPartitions;
        TopicPartition tp = partitions.get(pIdx);
        byte[] key = keyFunction.apply(i);
        byte[] value = valueFunction.apply(i);
        records.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), offsets[pIdx]++, timestampStartMillis + Duration.standardSeconds(i).getMillis(), timestampType, 0, key.length, value.length, key, value));
    }
    // This is updated when reader assigns partitions.
    final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.<TopicPartition>emptyList());
    final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {

        @Override
        public synchronized void assign(final Collection<TopicPartition> assigned) {
            super.assign(assigned);
            assignedPartitions.set(ImmutableList.copyOf(assigned));
            for (TopicPartition tp : assigned) {
                updateBeginningOffsets(ImmutableMap.of(tp, 0L));
                updateEndOffsets(ImmutableMap.of(tp, (long) records.get(tp).size()));
            }
        }

        // Override offsetsForTimes() in order to look up the offsets by timestamp.
        @Override
        public synchronized Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
            return timestampsToSearch.entrySet().stream().map(e -> {
                // In test scope, timestamp == offset.
                long maxOffset = offsets[partitions.indexOf(e.getKey())];
                long offset = e.getValue();
                OffsetAndTimestamp value = (offset >= maxOffset) ? null : new OffsetAndTimestamp(offset, offset);
                return new SimpleEntry<>(e.getKey(), value);
            }).collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
        }
    };
    for (String topic : topics) {
        consumer.updatePartitions(topic, partitionMap.get(topic));
    }
    // MockConsumer does not maintain any relationship between partition seek position and the
    // records added. e.g. if we add 10 records to a partition and then seek to end of the
    // partition, MockConsumer is still going to return the 10 records in next poll. It is
    // our responsibility to make sure currently enqueued records sync with partition offsets.
    // The following task will be called inside each invocation to MockConsumer.poll().
    // We enqueue only the records with the offset >= partition's current position.
    Runnable recordEnqueueTask = new Runnable() {

        @Override
        public void run() {
            // add all the records with offset >= current partition position.
            int recordsAdded = 0;
            for (TopicPartition tp : assignedPartitions.get()) {
                long curPos = consumer.position(tp);
                for (ConsumerRecord<byte[], byte[]> r : records.get(tp)) {
                    if (r.offset() >= curPos) {
                        consumer.addRecord(r);
                        recordsAdded++;
                    }
                }
            }
            if (recordsAdded == 0) {
                if (config.get("inject.error.at.eof") != null) {
                    consumer.setException(new KafkaException("Injected error in consumer.poll()"));
                }
                // MockConsumer.poll(timeout) does not actually wait even when there aren't any
                // records.
                // Add a small wait here in order to avoid busy looping in the reader.
                Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS);
            // TODO: BEAM-4086: testUnboundedSourceWithoutBoundedWrapper() occasionally hangs
            // without this wait. Need to look into it.
            }
            consumer.schedulePollTask(this);
        }
    };
    consumer.schedulePollTask(recordEnqueueTask);
    return consumer;
}
Also used : Count(org.apache.beam.sdk.transforms.Count) MetricName(org.apache.beam.sdk.metrics.MetricName) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SchemaRegistryClient(io.confluent.kafka.schemaregistry.client.SchemaRegistryClient) CoderUtils(org.apache.beam.sdk.util.CoderUtils) KafkaAvroSerializer(io.confluent.kafka.serializers.KafkaAvroSerializer) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) KafkaException(org.apache.kafka.common.KafkaException) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) Uninterruptibles(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles) TimestampType(org.apache.kafka.common.record.TimestampType) MockSchemaRegistry(io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry) MetricResult(org.apache.beam.sdk.metrics.MetricResult) UnboundedReader(org.apache.beam.sdk.io.UnboundedSource.UnboundedReader) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) RecordHeader(org.apache.kafka.common.header.internals.RecordHeader) ArrayList(java.util.ArrayList) SinkMetrics(org.apache.beam.sdk.metrics.SinkMetrics) Read(org.apache.beam.sdk.io.Read) Distinct(org.apache.beam.sdk.transforms.Distinct) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Deserializer(org.apache.kafka.common.serialization.Deserializer) DoFn(org.apache.beam.sdk.transforms.DoFn) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) ThrowableMessageMatcher.hasMessage(org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage) PAssert(org.apache.beam.sdk.testing.PAssert) Producer(org.apache.kafka.clients.producer.Producer) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) MetricResultsMatchers.attemptedMetricsResult(org.apache.beam.sdk.metrics.MetricResultsMatchers.attemptedMetricsResult) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Assert.assertNull(org.junit.Assert.assertNull) IsIterableWithSize(org.hamcrest.collection.IsIterableWithSize) Serializer(org.apache.kafka.common.serialization.Serializer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Assert.assertEquals(org.junit.Assert.assertEquals) MockProducer(org.apache.kafka.clients.producer.MockProducer) SourceMetrics(org.apache.beam.sdk.metrics.SourceMetrics) Matchers.isA(org.hamcrest.Matchers.isA) MockConsumer(org.apache.kafka.clients.consumer.MockConsumer) PipelineResult(org.apache.beam.sdk.PipelineResult) LoggerFactory(org.slf4j.LoggerFactory) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ByteBuffer(java.nio.ByteBuffer) PCollectionList(org.apache.beam.sdk.values.PCollectionList) ThrowableCauseMatcher.hasCause(org.junit.internal.matchers.ThrowableCauseMatcher.hasCause) Method(java.lang.reflect.Method) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) Min(org.apache.beam.sdk.transforms.Min) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) Collection(java.util.Collection) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Collectors(java.util.stream.Collectors) List(java.util.List) Max(org.apache.beam.sdk.transforms.Max) ParDo(org.apache.beam.sdk.transforms.ParDo) Header(org.apache.kafka.common.header.Header) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assume.assumeTrue(org.junit.Assume.assumeTrue) Values(org.apache.beam.sdk.transforms.Values) MetricNameFilter(org.apache.beam.sdk.metrics.MetricNameFilter) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Headers(org.apache.kafka.common.header.Headers) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AvroGeneratedUser(org.apache.beam.sdk.io.AvroGeneratedUser) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) AtomicReference(java.util.concurrent.atomic.AtomicReference) ConcurrentMap(java.util.concurrent.ConcurrentMap) SimpleEntry(java.util.AbstractMap.SimpleEntry) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ExpectedException(org.junit.rules.ExpectedException) ExecutorService(java.util.concurrent.ExecutorService) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Utils(org.apache.kafka.common.utils.Utils) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) Assert.assertNotNull(org.junit.Assert.assertNotNull) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) MetricsFilter(org.apache.beam.sdk.metrics.MetricsFilter) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ConfluentSchemaRegistryDeserializerProviderTest.mockDeserializerProvider(org.apache.beam.sdk.io.kafka.ConfluentSchemaRegistryDeserializerProviderTest.mockDeserializerProvider) Instant(org.joda.time.Instant) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) Comparator(java.util.Comparator) Collections(java.util.Collections) IsIterableContainingInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Matchers.containsString(org.hamcrest.Matchers.containsString) TimestampType(org.apache.kafka.common.record.TimestampType) ArrayList(java.util.ArrayList) PCollectionList(org.apache.beam.sdk.values.PCollectionList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) PartitionInfo(org.apache.kafka.common.PartitionInfo) MockConsumer(org.apache.kafka.clients.consumer.MockConsumer) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) AtomicReference(java.util.concurrent.atomic.AtomicReference) TopicPartition(org.apache.kafka.common.TopicPartition) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) KafkaException(org.apache.kafka.common.KafkaException) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 3 with OffsetResetStrategy

use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project beam by apache.

the class KafkaTestTable method mkMockConsumer.

private MockConsumer<byte[], byte[]> mkMockConsumer(Map<String, Object> config) {
    OffsetResetStrategy offsetResetStrategy = OffsetResetStrategy.EARLIEST;
    final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> kafkaRecords = new HashMap<>();
    Map<String, List<PartitionInfo>> partitionInfoMap = new HashMap<>();
    Map<String, List<TopicPartition>> partitionMap = new HashMap<>();
    // Create Topic Paritions
    for (String topic : this.getTopics()) {
        List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
        List<TopicPartition> topicParitions = new ArrayList<>(partitionsPerTopic);
        for (int i = 0; i < partitionsPerTopic; i++) {
            TopicPartition tp = new TopicPartition(topic, i);
            topicParitions.add(tp);
            partIds.add(new PartitionInfo(topic, i, null, null, null));
            kafkaRecords.put(tp, new ArrayList<>());
        }
        partitionInfoMap.put(topic, partIds);
        partitionMap.put(topic, topicParitions);
    }
    TimestampType timestampType = TimestampType.forName((String) config.getOrDefault(TIMESTAMP_TYPE_CONFIG, TimestampType.LOG_APPEND_TIME.toString()));
    for (KafkaTestRecord record : this.records) {
        int partitionIndex = record.getKey().hashCode() % partitionsPerTopic;
        TopicPartition tp = partitionMap.get(record.getTopic()).get(partitionIndex);
        byte[] key = record.getKey().getBytes(UTF_8);
        byte[] value = record.getValue().toByteArray();
        kafkaRecords.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), kafkaRecords.get(tp).size(), record.getTimeStamp(), timestampType, 0, key.length, value.length, key, value));
    }
    // This is updated when reader assigns partitions.
    final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.emptyList());
    final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {

        @Override
        public synchronized void assign(final Collection<TopicPartition> assigned) {
            Collection<TopicPartition> realPartitions = assigned.stream().map(part -> partitionMap.get(part.topic()).get(part.partition())).collect(Collectors.toList());
            super.assign(realPartitions);
            assignedPartitions.set(ImmutableList.copyOf(realPartitions));
            for (TopicPartition tp : realPartitions) {
                updateBeginningOffsets(ImmutableMap.of(tp, 0L));
                updateEndOffsets(ImmutableMap.of(tp, (long) kafkaRecords.get(tp).size()));
            }
        }

        // Override offsetsForTimes() in order to look up the offsets by timestamp.
        @Override
        public synchronized Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
            return timestampsToSearch.entrySet().stream().map(e -> {
                // In test scope, timestamp == offset. ????
                long maxOffset = kafkaRecords.get(e.getKey()).size();
                long offset = e.getValue();
                OffsetAndTimestamp value = (offset >= maxOffset) ? null : new OffsetAndTimestamp(offset, offset);
                return new AbstractMap.SimpleEntry<>(e.getKey(), value);
            }).collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
        }
    };
    for (String topic : getTopics()) {
        consumer.updatePartitions(topic, partitionInfoMap.get(topic));
    }
    Runnable recordEnqueueTask = new Runnable() {

        @Override
        public void run() {
            // add all the records with offset >= current partition position.
            int recordsAdded = 0;
            for (TopicPartition tp : assignedPartitions.get()) {
                long curPos = consumer.position(tp);
                for (ConsumerRecord<byte[], byte[]> r : kafkaRecords.get(tp)) {
                    if (r.offset() >= curPos) {
                        consumer.addRecord(r);
                        recordsAdded++;
                    }
                }
            }
            if (recordsAdded == 0) {
                if (config.get("inject.error.at.eof") != null) {
                    consumer.setException(new KafkaException("Injected error in consumer.poll()"));
                }
                // MockConsumer.poll(timeout) does not actually wait even when there aren't any
                // records.
                // Add a small wait here in order to avoid busy looping in the reader.
                Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS);
            }
            consumer.schedulePollTask(this);
        }
    };
    consumer.schedulePollTask(recordEnqueueTask);
    return consumer;
}
Also used : ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) MockConsumer(org.apache.kafka.clients.consumer.MockConsumer) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) KafkaIO(org.apache.beam.sdk.io.kafka.KafkaIO) PTransform(org.apache.beam.sdk.transforms.PTransform) Map(java.util.Map) Uninterruptibles(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles) Row(org.apache.beam.sdk.values.Row) TimestampType(org.apache.kafka.common.record.TimestampType) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaRecord(org.apache.beam.sdk.io.kafka.KafkaRecord) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Collection(java.util.Collection) PartitionInfo(org.apache.kafka.common.PartitionInfo) PCollection(org.apache.beam.sdk.values.PCollection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) TimeUnit(java.util.concurrent.TimeUnit) AbstractMap(java.util.AbstractMap) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Collections(java.util.Collections) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AbstractMap(java.util.AbstractMap) TimestampType(org.apache.kafka.common.record.TimestampType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) PartitionInfo(org.apache.kafka.common.PartitionInfo) MockConsumer(org.apache.kafka.clients.consumer.MockConsumer) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) AtomicReference(java.util.concurrent.atomic.AtomicReference) TopicPartition(org.apache.kafka.common.TopicPartition) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) AbstractMap(java.util.AbstractMap)

Example 4 with OffsetResetStrategy

use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project kafka by apache.

the class Fetcher method resetOffset.

/**
     * Reset offsets for the given partition using the offset reset strategy.
     *
     * @param partition The given partition that needs reset offset
     * @throws org.apache.kafka.clients.consumer.NoOffsetForPartitionException If no offset reset strategy is defined
     */
private void resetOffset(TopicPartition partition) {
    OffsetResetStrategy strategy = subscriptions.resetStrategy(partition);
    log.debug("Resetting offset for partition {} to {} offset.", partition, strategy.name().toLowerCase(Locale.ROOT));
    final long timestamp;
    if (strategy == OffsetResetStrategy.EARLIEST)
        timestamp = ListOffsetRequest.EARLIEST_TIMESTAMP;
    else if (strategy == OffsetResetStrategy.LATEST)
        timestamp = ListOffsetRequest.LATEST_TIMESTAMP;
    else
        throw new NoOffsetForPartitionException(partition);
    Map<TopicPartition, OffsetData> offsetsByTimes = retrieveOffsetsByTimes(Collections.singletonMap(partition, timestamp), Long.MAX_VALUE, false);
    OffsetData offsetData = offsetsByTimes.get(partition);
    if (offsetData == null)
        throw new NoOffsetForPartitionException(partition);
    long offset = offsetData.offset;
    // we might lose the assignment while fetching the offset, so check it is still active
    if (subscriptions.isAssigned(partition))
        this.subscriptions.seek(partition, offset);
}
Also used : OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) TopicPartition(org.apache.kafka.common.TopicPartition) NoOffsetForPartitionException(org.apache.kafka.clients.consumer.NoOffsetForPartitionException)

Example 5 with OffsetResetStrategy

use of org.apache.kafka.clients.consumer.OffsetResetStrategy in project kafka by apache.

the class FetcherTest method testOffsetValidationWithGivenEpochOffset.

private void testOffsetValidationWithGivenEpochOffset(int leaderEpoch, long endOffset, OffsetResetStrategy offsetResetStrategy) {
    buildFetcher(offsetResetStrategy);
    assignFromUser(singleton(tp0));
    Map<String, Integer> partitionCounts = new HashMap<>();
    partitionCounts.put(tp0.topic(), 4);
    final int epochOne = 1;
    final long initialOffset = 5;
    metadata.updateWithCurrentRequestVersion(RequestTestUtils.metadataUpdateWithIds("dummy", 1, Collections.emptyMap(), partitionCounts, tp -> epochOne, topicIds), false, 0L);
    // Offset validation requires OffsetForLeaderEpoch request v3 or higher
    Node node = metadata.fetch().nodes().get(0);
    apiVersions.update(node.idString(), NodeApiVersions.create());
    Metadata.LeaderAndEpoch leaderAndEpoch = new Metadata.LeaderAndEpoch(metadata.currentLeader(tp0).leader, Optional.of(epochOne));
    subscriptions.seekUnvalidated(tp0, new SubscriptionState.FetchPosition(initialOffset, Optional.of(epochOne), leaderAndEpoch));
    fetcher.validateOffsetsIfNeeded();
    consumerClient.poll(time.timer(Duration.ZERO));
    assertTrue(subscriptions.awaitingValidation(tp0));
    assertTrue(client.hasInFlightRequests());
    client.respond(offsetsForLeaderEpochRequestMatcher(tp0, epochOne, epochOne), prepareOffsetsForLeaderEpochResponse(tp0, Errors.NONE, leaderEpoch, endOffset));
    consumerClient.poll(time.timer(Duration.ZERO));
    if (offsetResetStrategy == OffsetResetStrategy.NONE) {
        LogTruncationException thrown = assertThrows(LogTruncationException.class, () -> fetcher.validateOffsetsIfNeeded());
        assertEquals(singletonMap(tp0, initialOffset), thrown.offsetOutOfRangePartitions());
        if (endOffset == UNDEFINED_EPOCH_OFFSET || leaderEpoch == UNDEFINED_EPOCH) {
            assertEquals(Collections.emptyMap(), thrown.divergentOffsets());
        } else {
            OffsetAndMetadata expectedDivergentOffset = new OffsetAndMetadata(endOffset, Optional.of(leaderEpoch), "");
            assertEquals(singletonMap(tp0, expectedDivergentOffset), thrown.divergentOffsets());
        }
        assertTrue(subscriptions.awaitingValidation(tp0));
    } else {
        fetcher.validateOffsetsIfNeeded();
        assertFalse(subscriptions.awaitingValidation(tp0));
    }
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) MockTime(org.apache.kafka.common.utils.MockTime) Arrays(java.util.Arrays) ListOffsetsRequest(org.apache.kafka.common.requests.ListOffsetsRequest) SerializationException(org.apache.kafka.common.errors.SerializationException) KafkaException(org.apache.kafka.common.KafkaException) DefaultRecordBatch(org.apache.kafka.common.record.DefaultRecordBatch) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) Collections.singletonList(java.util.Collections.singletonList) ClientUtils(org.apache.kafka.clients.ClientUtils) Cluster(org.apache.kafka.common.Cluster) Future(java.util.concurrent.Future) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) DataOutputStream(java.io.DataOutputStream) ApiVersionsResponse(org.apache.kafka.common.requests.ApiVersionsResponse) Arrays.asList(java.util.Arrays.asList) RecordBatch(org.apache.kafka.common.record.RecordBatch) ListOffsetsResponse(org.apache.kafka.common.requests.ListOffsetsResponse) LogContext(org.apache.kafka.common.utils.LogContext) Duration(java.time.Duration) Map(java.util.Map) FetchResponse(org.apache.kafka.common.requests.FetchResponse) TimestampType(org.apache.kafka.common.record.TimestampType) Sensor(org.apache.kafka.common.metrics.Sensor) CompressionType(org.apache.kafka.common.record.CompressionType) TestUtils(org.apache.kafka.test.TestUtils) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) InvalidTopicException(org.apache.kafka.common.errors.InvalidTopicException) Set(java.util.Set) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) TopicIdPartition(org.apache.kafka.common.TopicIdPartition) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) MemoryRecords(org.apache.kafka.common.record.MemoryRecords) Metrics(org.apache.kafka.common.metrics.Metrics) ApiMessageType(org.apache.kafka.common.message.ApiMessageType) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ListOffsetsTopicResponse(org.apache.kafka.common.message.ListOffsetsResponseData.ListOffsetsTopicResponse) Errors(org.apache.kafka.common.protocol.Errors) Node(org.apache.kafka.common.Node) FetchRequest(org.apache.kafka.common.requests.FetchRequest) MetadataResponse(org.apache.kafka.common.requests.MetadataResponse) NodeApiVersions(org.apache.kafka.clients.NodeApiVersions) Records(org.apache.kafka.common.record.Records) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) OffsetForLeaderPartition(org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.OffsetForLeaderPartition) ClientDnsLookup(org.apache.kafka.clients.ClientDnsLookup) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) RequestTestUtils(org.apache.kafka.common.requests.RequestTestUtils) RecordHeader(org.apache.kafka.common.header.internals.RecordHeader) ListOffsetsPartitionResponse(org.apache.kafka.common.message.ListOffsetsResponseData.ListOffsetsPartitionResponse) OffsetForLeaderEpochRequestData(org.apache.kafka.common.message.OffsetForLeaderEpochRequestData) INVALID_SESSION_ID(org.apache.kafka.common.requests.FetchMetadata.INVALID_SESSION_ID) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) UNDEFINED_EPOCH(org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH) NetworkClient(org.apache.kafka.clients.NetworkClient) Deserializer(org.apache.kafka.common.serialization.Deserializer) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) TestUtils.assertOptional(org.apache.kafka.test.TestUtils.assertOptional) OffsetOutOfRangeException(org.apache.kafka.clients.consumer.OffsetOutOfRangeException) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) MockSelector(org.apache.kafka.test.MockSelector) Field(java.lang.reflect.Field) ApiVersions(org.apache.kafka.clients.ApiVersions) MetricNameTemplate(org.apache.kafka.common.MetricNameTemplate) OffsetForLeaderEpochResponseData(org.apache.kafka.common.message.OffsetForLeaderEpochResponseData) Assertions.assertArrayEquals(org.junit.jupiter.api.Assertions.assertArrayEquals) AfterEach(org.junit.jupiter.api.AfterEach) NetworkReceive(org.apache.kafka.common.network.NetworkReceive) SimpleRecord(org.apache.kafka.common.record.SimpleRecord) PartitionData(org.apache.kafka.common.requests.FetchRequest.PartitionData) BytesDeserializer(org.apache.kafka.common.serialization.BytesDeserializer) ByteBufferOutputStream(org.apache.kafka.common.utils.ByteBufferOutputStream) LogTruncationException(org.apache.kafka.clients.consumer.LogTruncationException) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) AbstractRequest(org.apache.kafka.common.requests.AbstractRequest) ControlRecordType(org.apache.kafka.common.record.ControlRecordType) ByteBuffer(java.nio.ByteBuffer) ClientRequest(org.apache.kafka.clients.ClientRequest) FetchResponseData(org.apache.kafka.common.message.FetchResponseData) Record(org.apache.kafka.common.record.Record) Collections.singleton(java.util.Collections.singleton) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) BufferSupplier(org.apache.kafka.common.utils.BufferSupplier) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MetricName(org.apache.kafka.common.MetricName) OffsetForLeaderTopicResult(org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.OffsetForLeaderTopicResult) ListOffsetsTopic(org.apache.kafka.common.message.ListOffsetsRequestData.ListOffsetsTopic) TopicPartition(org.apache.kafka.common.TopicPartition) MemoryRecordsBuilder(org.apache.kafka.common.record.MemoryRecordsBuilder) LegacyRecord(org.apache.kafka.common.record.LegacyRecord) Collections.emptyList(java.util.Collections.emptyList) MetricConfig(org.apache.kafka.common.metrics.MetricConfig) ClusterResourceListeners(org.apache.kafka.common.internals.ClusterResourceListeners) Collectors(java.util.stream.Collectors) ListOffsetsResponseData(org.apache.kafka.common.message.ListOffsetsResponseData) Test(org.junit.jupiter.api.Test) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) List(java.util.List) Header(org.apache.kafka.common.header.Header) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) RecordTooLargeException(org.apache.kafka.common.errors.RecordTooLargeException) Optional(java.util.Optional) KafkaMetric(org.apache.kafka.common.metrics.KafkaMetric) OffsetsForLeaderEpochResponse(org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse) Uuid(org.apache.kafka.common.Uuid) EpochEndOffset(org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset) Metadata(org.apache.kafka.clients.Metadata) EndTransactionMarker(org.apache.kafka.common.record.EndTransactionMarker) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FetchSessionHandler(org.apache.kafka.clients.FetchSessionHandler) HashMap(java.util.HashMap) Function(java.util.function.Function) HashSet(java.util.HashSet) MetadataRequest(org.apache.kafka.common.requests.MetadataRequest) Collections.singletonMap(java.util.Collections.singletonMap) ExecutorService(java.util.concurrent.ExecutorService) Utils(org.apache.kafka.common.utils.Utils) UNDEFINED_EPOCH_OFFSET(org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH_OFFSET) Collections.emptyMap(java.util.Collections.emptyMap) TimeoutException(org.apache.kafka.common.errors.TimeoutException) MockClient(org.apache.kafka.clients.MockClient) ListOffsetsPartition(org.apache.kafka.common.message.ListOffsetsRequestData.ListOffsetsPartition) Iterator(java.util.Iterator) ApiKeys(org.apache.kafka.common.protocol.ApiKeys) TimeUnit(java.util.concurrent.TimeUnit) IsolationLevel(org.apache.kafka.common.IsolationLevel) DelayedReceive(org.apache.kafka.test.DelayedReceive) TopicAuthorizationException(org.apache.kafka.common.errors.TopicAuthorizationException) OffsetsForLeaderEpochRequest(org.apache.kafka.common.requests.OffsetsForLeaderEpochRequest) Collections(java.util.Collections) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) Node(org.apache.kafka.common.Node) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Metadata(org.apache.kafka.clients.Metadata) LogTruncationException(org.apache.kafka.clients.consumer.LogTruncationException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata)

Aggregations

OffsetResetStrategy (org.apache.kafka.clients.consumer.OffsetResetStrategy)5 TopicPartition (org.apache.kafka.common.TopicPartition)5 HashMap (java.util.HashMap)4 ArrayList (java.util.ArrayList)3 Collections (java.util.Collections)3 List (java.util.List)3 Map (java.util.Map)3 TimeUnit (java.util.concurrent.TimeUnit)3 Collectors (java.util.stream.Collectors)3 ByteBuffer (java.nio.ByteBuffer)2 StandardCharsets (java.nio.charset.StandardCharsets)2 Arrays (java.util.Arrays)2 Collection (java.util.Collection)2 LinkedHashMap (java.util.LinkedHashMap)2 Optional (java.util.Optional)2 ExecutorService (java.util.concurrent.ExecutorService)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 PCollection (org.apache.beam.sdk.values.PCollection)2 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)2 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)2