Search in sources :

Example 1 with CheckpointStrategy

use of io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy in project mantis by Netflix.

the class KafkaSource method createBackPressuredConsumerObs.

/**
 * Create an observable with back pressure semantics from the consumer records fetched using consumer.
 *
 * @param mantisKafkaConsumer non thread-safe KafkaConsumer
 * @param kafkaSourceConfig   configuration for the Mantis Kafka Source
 */
private Observable<KafkaAckable> createBackPressuredConsumerObs(final MantisKafkaConsumer<?> mantisKafkaConsumer, final MantisKafkaSourceConfig kafkaSourceConfig) {
    CheckpointStrategy checkpointStrategy = mantisKafkaConsumer.getStrategy();
    final CheckpointTrigger trigger = mantisKafkaConsumer.getTrigger();
    final ConsumerMetrics consumerMetrics = mantisKafkaConsumer.getConsumerMetrics();
    final TopicPartitionStateManager partitionStateManager = mantisKafkaConsumer.getPartitionStateManager();
    int mantisKafkaConsumerId = mantisKafkaConsumer.getConsumerId();
    SyncOnSubscribe<Iterator<ConsumerRecord<String, byte[]>>, KafkaAckable> syncOnSubscribe = SyncOnSubscribe.createStateful(() -> {
        final ConsumerRecords<String, byte[]> records = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("topic listing for consumer {}", mantisKafkaConsumer.listTopics());
        }
        LOGGER.info("consumer subscribed to topic-partitions {}", mantisKafkaConsumer.assignment());
        return records.iterator();
    }, (consumerRecordIterator, observer) -> {
        Iterator<ConsumerRecord<String, byte[]>> it = consumerRecordIterator;
        final Set<TopicPartition> partitions = mantisKafkaConsumer.assignment();
        if (trigger.shouldCheckpoint()) {
            long startTime = System.currentTimeMillis();
            final Map<TopicPartition, OffsetAndMetadata> checkpoint = partitionStateManager.createCheckpoint(partitions);
            checkpointStrategy.persistCheckpoint(checkpoint);
            long now = System.currentTimeMillis();
            consumerMetrics.recordCheckpointDelay(now - startTime);
            consumerMetrics.incrementCommitCount();
            trigger.reset();
        }
        if (!done.get()) {
            try {
                if (!consumerRecordIterator.hasNext()) {
                    final ConsumerRecords<String, byte[]> consumerRecords = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("poll returned {} records", consumerRecords.count());
                    }
                    it = consumerRecords.iterator();
                }
                if (it.hasNext()) {
                    final ConsumerRecord<String, byte[]> m = it.next();
                    final TopicPartition topicPartition = new TopicPartition(m.topic(), m.partition());
                    consumerMetrics.incrementInCount();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("updating read offset to " + m.offset() + " read " + m.value());
                    }
                    if (m.value() != null) {
                        try {
                            trigger.update(getPayloadSize(m));
                            if (kafkaSourceConfig.getParseMessageInSource()) {
                                final Parser parser = ParserType.parser(kafkaSourceConfig.getMessageParserType()).getParser();
                                if (parser.canParse(m.value())) {
                                    final Map<String, Object> parsedKafkaValue = parser.parseMessage(m.value());
                                    final KafkaData kafkaData = new KafkaData(m, Optional.ofNullable(parsedKafkaValue), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
                                    final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
                                    // record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
                                    partitionStateManager.recordMessageRead(topicPartition, m.offset());
                                    consumerMetrics.recordReadOffset(topicPartition, m.offset());
                                    observer.onNext(ackable);
                                } else {
                                    consumerMetrics.incrementParseFailureCount();
                                }
                            } else {
                                final KafkaData kafkaData = new KafkaData(m, Optional.empty(), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
                                final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
                                // record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
                                partitionStateManager.recordMessageRead(topicPartition, m.offset());
                                consumerMetrics.recordReadOffset(topicPartition, m.offset());
                                observer.onNext(ackable);
                            }
                        } catch (ParseException pe) {
                            consumerMetrics.incrementErrorCount();
                            LOGGER.warn("failed to parse {}:{} message {}", m.topic(), m.partition(), m.value(), pe);
                        }
                    } else {
                        consumerMetrics.incrementKafkaMessageValueNullCount();
                    }
                } else {
                    consumerMetrics.incrementWaitForDataCount();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("Reached head of partition, waiting for more data");
                    }
                    TimeUnit.MILLISECONDS.sleep(200);
                }
            } catch (TimeoutException toe) {
                consumerMetrics.incrementWaitForDataCount();
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Reached head of partition waiting for more data");
                }
            } catch (OffsetOutOfRangeException oore) {
                LOGGER.warn("offsets out of range " + oore.partitions() + " will seek to beginning", oore);
                final Set<TopicPartition> topicPartitionSet = oore.partitions();
                for (TopicPartition tp : topicPartitionSet) {
                    LOGGER.info("partition {} consumer position {}", tp, mantisKafkaConsumer.position(tp));
                }
                mantisKafkaConsumer.seekToBeginning(oore.partitions().toArray(new TopicPartition[oore.partitions().size()]));
            } catch (InvalidRecordException ire) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("iterator error with invalid message. message will be dropped " + ire.getMessage());
            } catch (KafkaException e) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("Other Kafka exception, message will be dropped. " + e.getMessage());
            } catch (InterruptedException ie) {
                LOGGER.error("consumer interrupted", ie);
                Thread.currentThread().interrupt();
            } catch (Exception e) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("caught exception", e);
            }
        } else {
            mantisKafkaConsumer.close();
        }
        return it;
    }, consumerRecordIterator -> {
        LOGGER.info("closing Kafka consumer on unsubscribe" + mantisKafkaConsumer.toString());
        mantisKafkaConsumer.close();
    });
    return Observable.create(syncOnSubscribe).subscribeOn(Schedulers.newThread()).doOnUnsubscribe(() -> LOGGER.info("consumer {} stopped due to unsubscribe", mantisKafkaConsumerId)).doOnError((t) -> {
        LOGGER.error("consumer {} stopped due to error", mantisKafkaConsumerId, t);
        consumerMetrics.incrementErrorCount();
    }).doOnTerminate(() -> LOGGER.info("consumer {} terminated", mantisKafkaConsumerId));
}
Also used : Index(io.mantisrx.runtime.source.Index) KafkaSourceParameters(io.mantisrx.connector.kafka.KafkaSourceParameters) org.apache.kafka.clients.consumer(org.apache.kafka.clients.consumer) java.util(java.util) CONSUMER_RECORD_OVERHEAD_BYTES(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.CONSUMER_RECORD_OVERHEAD_BYTES) ParserType(io.mantisrx.connector.kafka.source.serde.ParserType) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) KafkaException(org.apache.kafka.common.KafkaException) Source(io.mantisrx.runtime.source.Source) SyncOnSubscribe(rx.observables.SyncOnSubscribe) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) KafkaDataNotification(io.mantisrx.connector.kafka.KafkaDataNotification) CheckpointStrategyOptions(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategyOptions) Observable(rx.Observable) BooleanParameter(io.mantisrx.runtime.parameter.type.BooleanParameter) KafkaData(io.mantisrx.connector.kafka.KafkaData) KafkaAckable(io.mantisrx.connector.kafka.KafkaAckable) SerializedSubject(rx.subjects.SerializedSubject) Schedulers(rx.schedulers.Schedulers) TopicPartition(org.apache.kafka.common.TopicPartition) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Logger(org.slf4j.Logger) Parser(io.mantisrx.connector.kafka.source.serde.Parser) DEFAULT_PARSE_MSG_IN_SOURCE(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_PARSE_MSG_IN_SOURCE) CheckpointStrategy(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy) DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) Context(io.mantisrx.runtime.Context) DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN) ParameterDefinition(io.mantisrx.runtime.parameter.ParameterDefinition) Validators(io.mantisrx.runtime.parameter.validator.Validators) TimeUnit(java.util.concurrent.TimeUnit) ConsumerMetrics(io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics) DEFAULT_MAX_BYTES_IN_PROCESSING(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_MAX_BYTES_IN_PROCESSING) CheckpointTrigger(io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger) Registry(com.netflix.spectator.api.Registry) IntParameter(io.mantisrx.runtime.parameter.type.IntParameter) StringParameter(io.mantisrx.runtime.parameter.type.StringParameter) PublishSubject(rx.subjects.PublishSubject) CheckpointStrategy(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) KafkaAckable(io.mantisrx.connector.kafka.KafkaAckable) KafkaException(org.apache.kafka.common.KafkaException) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) Parser(io.mantisrx.connector.kafka.source.serde.Parser) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaData(io.mantisrx.connector.kafka.KafkaData) KafkaException(org.apache.kafka.common.KafkaException) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) CheckpointTrigger(io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger) ConsumerMetrics(io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics)

Aggregations

Registry (com.netflix.spectator.api.Registry)1 KafkaAckable (io.mantisrx.connector.kafka.KafkaAckable)1 KafkaData (io.mantisrx.connector.kafka.KafkaData)1 KafkaDataNotification (io.mantisrx.connector.kafka.KafkaDataNotification)1 KafkaSourceParameters (io.mantisrx.connector.kafka.KafkaSourceParameters)1 CONSUMER_RECORD_OVERHEAD_BYTES (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.CONSUMER_RECORD_OVERHEAD_BYTES)1 DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN)1 DEFAULT_MAX_BYTES_IN_PROCESSING (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_MAX_BYTES_IN_PROCESSING)1 DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER)1 DEFAULT_PARSE_MSG_IN_SOURCE (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_PARSE_MSG_IN_SOURCE)1 CheckpointStrategy (io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy)1 CheckpointStrategyOptions (io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategyOptions)1 CheckpointTrigger (io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger)1 ConsumerMetrics (io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics)1 ParseException (io.mantisrx.connector.kafka.source.serde.ParseException)1 Parser (io.mantisrx.connector.kafka.source.serde.Parser)1 ParserType (io.mantisrx.connector.kafka.source.serde.ParserType)1 Context (io.mantisrx.runtime.Context)1 ParameterDefinition (io.mantisrx.runtime.parameter.ParameterDefinition)1 BooleanParameter (io.mantisrx.runtime.parameter.type.BooleanParameter)1