Search in sources :

Example 1 with Parser

use of io.mantisrx.connector.kafka.source.serde.Parser in project mantis by Netflix.

the class KafkaSource method createBackPressuredConsumerObs.

/**
 * Create an observable with back pressure semantics from the consumer records fetched using consumer.
 *
 * @param mantisKafkaConsumer non thread-safe KafkaConsumer
 * @param kafkaSourceConfig   configuration for the Mantis Kafka Source
 */
private Observable<KafkaAckable> createBackPressuredConsumerObs(final MantisKafkaConsumer<?> mantisKafkaConsumer, final MantisKafkaSourceConfig kafkaSourceConfig) {
    CheckpointStrategy checkpointStrategy = mantisKafkaConsumer.getStrategy();
    final CheckpointTrigger trigger = mantisKafkaConsumer.getTrigger();
    final ConsumerMetrics consumerMetrics = mantisKafkaConsumer.getConsumerMetrics();
    final TopicPartitionStateManager partitionStateManager = mantisKafkaConsumer.getPartitionStateManager();
    int mantisKafkaConsumerId = mantisKafkaConsumer.getConsumerId();
    SyncOnSubscribe<Iterator<ConsumerRecord<String, byte[]>>, KafkaAckable> syncOnSubscribe = SyncOnSubscribe.createStateful(() -> {
        final ConsumerRecords<String, byte[]> records = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("topic listing for consumer {}", mantisKafkaConsumer.listTopics());
        }
        LOGGER.info("consumer subscribed to topic-partitions {}", mantisKafkaConsumer.assignment());
        return records.iterator();
    }, (consumerRecordIterator, observer) -> {
        Iterator<ConsumerRecord<String, byte[]>> it = consumerRecordIterator;
        final Set<TopicPartition> partitions = mantisKafkaConsumer.assignment();
        if (trigger.shouldCheckpoint()) {
            long startTime = System.currentTimeMillis();
            final Map<TopicPartition, OffsetAndMetadata> checkpoint = partitionStateManager.createCheckpoint(partitions);
            checkpointStrategy.persistCheckpoint(checkpoint);
            long now = System.currentTimeMillis();
            consumerMetrics.recordCheckpointDelay(now - startTime);
            consumerMetrics.incrementCommitCount();
            trigger.reset();
        }
        if (!done.get()) {
            try {
                if (!consumerRecordIterator.hasNext()) {
                    final ConsumerRecords<String, byte[]> consumerRecords = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("poll returned {} records", consumerRecords.count());
                    }
                    it = consumerRecords.iterator();
                }
                if (it.hasNext()) {
                    final ConsumerRecord<String, byte[]> m = it.next();
                    final TopicPartition topicPartition = new TopicPartition(m.topic(), m.partition());
                    consumerMetrics.incrementInCount();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("updating read offset to " + m.offset() + " read " + m.value());
                    }
                    if (m.value() != null) {
                        try {
                            trigger.update(getPayloadSize(m));
                            if (kafkaSourceConfig.getParseMessageInSource()) {
                                final Parser parser = ParserType.parser(kafkaSourceConfig.getMessageParserType()).getParser();
                                if (parser.canParse(m.value())) {
                                    final Map<String, Object> parsedKafkaValue = parser.parseMessage(m.value());
                                    final KafkaData kafkaData = new KafkaData(m, Optional.ofNullable(parsedKafkaValue), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
                                    final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
                                    // record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
                                    partitionStateManager.recordMessageRead(topicPartition, m.offset());
                                    consumerMetrics.recordReadOffset(topicPartition, m.offset());
                                    observer.onNext(ackable);
                                } else {
                                    consumerMetrics.incrementParseFailureCount();
                                }
                            } else {
                                final KafkaData kafkaData = new KafkaData(m, Optional.empty(), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
                                final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
                                // record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
                                partitionStateManager.recordMessageRead(topicPartition, m.offset());
                                consumerMetrics.recordReadOffset(topicPartition, m.offset());
                                observer.onNext(ackable);
                            }
                        } catch (ParseException pe) {
                            consumerMetrics.incrementErrorCount();
                            LOGGER.warn("failed to parse {}:{} message {}", m.topic(), m.partition(), m.value(), pe);
                        }
                    } else {
                        consumerMetrics.incrementKafkaMessageValueNullCount();
                    }
                } else {
                    consumerMetrics.incrementWaitForDataCount();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("Reached head of partition, waiting for more data");
                    }
                    TimeUnit.MILLISECONDS.sleep(200);
                }
            } catch (TimeoutException toe) {
                consumerMetrics.incrementWaitForDataCount();
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Reached head of partition waiting for more data");
                }
            } catch (OffsetOutOfRangeException oore) {
                LOGGER.warn("offsets out of range " + oore.partitions() + " will seek to beginning", oore);
                final Set<TopicPartition> topicPartitionSet = oore.partitions();
                for (TopicPartition tp : topicPartitionSet) {
                    LOGGER.info("partition {} consumer position {}", tp, mantisKafkaConsumer.position(tp));
                }
                mantisKafkaConsumer.seekToBeginning(oore.partitions().toArray(new TopicPartition[oore.partitions().size()]));
            } catch (InvalidRecordException ire) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("iterator error with invalid message. message will be dropped " + ire.getMessage());
            } catch (KafkaException e) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("Other Kafka exception, message will be dropped. " + e.getMessage());
            } catch (InterruptedException ie) {
                LOGGER.error("consumer interrupted", ie);
                Thread.currentThread().interrupt();
            } catch (Exception e) {
                consumerMetrics.incrementErrorCount();
                LOGGER.warn("caught exception", e);
            }
        } else {
            mantisKafkaConsumer.close();
        }
        return it;
    }, consumerRecordIterator -> {
        LOGGER.info("closing Kafka consumer on unsubscribe" + mantisKafkaConsumer.toString());
        mantisKafkaConsumer.close();
    });
    return Observable.create(syncOnSubscribe).subscribeOn(Schedulers.newThread()).doOnUnsubscribe(() -> LOGGER.info("consumer {} stopped due to unsubscribe", mantisKafkaConsumerId)).doOnError((t) -> {
        LOGGER.error("consumer {} stopped due to error", mantisKafkaConsumerId, t);
        consumerMetrics.incrementErrorCount();
    }).doOnTerminate(() -> LOGGER.info("consumer {} terminated", mantisKafkaConsumerId));
}
Also used : Index(io.mantisrx.runtime.source.Index) KafkaSourceParameters(io.mantisrx.connector.kafka.KafkaSourceParameters) org.apache.kafka.clients.consumer(org.apache.kafka.clients.consumer) java.util(java.util) CONSUMER_RECORD_OVERHEAD_BYTES(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.CONSUMER_RECORD_OVERHEAD_BYTES) ParserType(io.mantisrx.connector.kafka.source.serde.ParserType) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) KafkaException(org.apache.kafka.common.KafkaException) Source(io.mantisrx.runtime.source.Source) SyncOnSubscribe(rx.observables.SyncOnSubscribe) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) KafkaDataNotification(io.mantisrx.connector.kafka.KafkaDataNotification) CheckpointStrategyOptions(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategyOptions) Observable(rx.Observable) BooleanParameter(io.mantisrx.runtime.parameter.type.BooleanParameter) KafkaData(io.mantisrx.connector.kafka.KafkaData) KafkaAckable(io.mantisrx.connector.kafka.KafkaAckable) SerializedSubject(rx.subjects.SerializedSubject) Schedulers(rx.schedulers.Schedulers) TopicPartition(org.apache.kafka.common.TopicPartition) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Logger(org.slf4j.Logger) Parser(io.mantisrx.connector.kafka.source.serde.Parser) DEFAULT_PARSE_MSG_IN_SOURCE(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_PARSE_MSG_IN_SOURCE) CheckpointStrategy(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy) DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) Context(io.mantisrx.runtime.Context) DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN) ParameterDefinition(io.mantisrx.runtime.parameter.ParameterDefinition) Validators(io.mantisrx.runtime.parameter.validator.Validators) TimeUnit(java.util.concurrent.TimeUnit) ConsumerMetrics(io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics) DEFAULT_MAX_BYTES_IN_PROCESSING(io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_MAX_BYTES_IN_PROCESSING) CheckpointTrigger(io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger) Registry(com.netflix.spectator.api.Registry) IntParameter(io.mantisrx.runtime.parameter.type.IntParameter) StringParameter(io.mantisrx.runtime.parameter.type.StringParameter) PublishSubject(rx.subjects.PublishSubject) CheckpointStrategy(io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) KafkaAckable(io.mantisrx.connector.kafka.KafkaAckable) KafkaException(org.apache.kafka.common.KafkaException) InvalidRecordException(org.apache.kafka.common.record.InvalidRecordException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) Parser(io.mantisrx.connector.kafka.source.serde.Parser) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaData(io.mantisrx.connector.kafka.KafkaData) KafkaException(org.apache.kafka.common.KafkaException) ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) CheckpointTrigger(io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger) ConsumerMetrics(io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics)

Example 2 with Parser

use of io.mantisrx.connector.kafka.source.serde.Parser in project mantis by Netflix.

the class AutoAckTaggingStage method processAndAck.

/**
 * default impl to ack the received data and returned parse kafka data
 *
 * @param ackable
 *
 * @return
 */
@Override
protected Map<String, Object> processAndAck(final Context context, KafkaAckable ackable) {
    try {
        Boolean messageParsedInSource = (Boolean) context.getParameters().get(KafkaSourceParameters.PARSE_MSG_IN_SOURCE, DEFAULT_PARSE_MSG_IN_SOURCE);
        String messageParserType = (String) context.getParameters().get(KafkaSourceParameters.PARSER_TYPE, ParserType.SIMPLE_JSON.getPropName());
        if (messageParsedInSource) {
            final Optional<Map<String, Object>> parsedEventO = ackable.getKafkaData().getParsedEvent();
            return parsedEventO.orElse(Collections.emptyMap());
        } else {
            final Parser parser = ParserType.parser(messageParserType).getParser();
            if (parser.canParse(ackable.getKafkaData().getRawBytes())) {
                return parser.parseMessage(ackable.getKafkaData().getRawBytes());
            } else {
                LOG.warn("cannot parse message {}", ackable.getKafkaData().getRawBytes().toString());
                throw new ParseException("cannot parse message");
            }
        }
    } catch (Throwable t) {
        if (t instanceof ParseException) {
            logger.warn("failed to parse message", t);
        } else {
            logger.error("caught unexpected exception", t);
        }
    } finally {
        ackable.ack();
    }
    return Collections.emptyMap();
}
Also used : ParseException(io.mantisrx.connector.kafka.source.serde.ParseException) Map(java.util.Map) Parser(io.mantisrx.connector.kafka.source.serde.Parser)

Aggregations

ParseException (io.mantisrx.connector.kafka.source.serde.ParseException)2 Parser (io.mantisrx.connector.kafka.source.serde.Parser)2 Registry (com.netflix.spectator.api.Registry)1 KafkaAckable (io.mantisrx.connector.kafka.KafkaAckable)1 KafkaData (io.mantisrx.connector.kafka.KafkaData)1 KafkaDataNotification (io.mantisrx.connector.kafka.KafkaDataNotification)1 KafkaSourceParameters (io.mantisrx.connector.kafka.KafkaSourceParameters)1 CONSUMER_RECORD_OVERHEAD_BYTES (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.CONSUMER_RECORD_OVERHEAD_BYTES)1 DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_ENABLE_STATIC_PARTITION_ASSIGN)1 DEFAULT_MAX_BYTES_IN_PROCESSING (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_MAX_BYTES_IN_PROCESSING)1 DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_NUM_KAFKA_CONSUMER_PER_WORKER)1 DEFAULT_PARSE_MSG_IN_SOURCE (io.mantisrx.connector.kafka.source.MantisKafkaSourceConfig.DEFAULT_PARSE_MSG_IN_SOURCE)1 CheckpointStrategy (io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategy)1 CheckpointStrategyOptions (io.mantisrx.connector.kafka.source.checkpoint.strategy.CheckpointStrategyOptions)1 CheckpointTrigger (io.mantisrx.connector.kafka.source.checkpoint.trigger.CheckpointTrigger)1 ConsumerMetrics (io.mantisrx.connector.kafka.source.metrics.ConsumerMetrics)1 ParserType (io.mantisrx.connector.kafka.source.serde.ParserType)1 Context (io.mantisrx.runtime.Context)1 ParameterDefinition (io.mantisrx.runtime.parameter.ParameterDefinition)1 BooleanParameter (io.mantisrx.runtime.parameter.type.BooleanParameter)1