Search in sources :

Example 1 with KeyValueSchema

use of org.apache.pulsar.client.api.schema.KeyValueSchema in project pulsar by apache.

the class PulsarClientImpl method preProcessSchemaBeforeSubscribe.

@SuppressWarnings("unchecked")
protected <T> CompletableFuture<Schema<T>> preProcessSchemaBeforeSubscribe(PulsarClientImpl pulsarClientImpl, Schema<T> schema, String topicName) {
    if (schema != null && schema.supportSchemaVersioning()) {
        final SchemaInfoProvider schemaInfoProvider;
        try {
            schemaInfoProvider = pulsarClientImpl.getSchemaProviderLoadingCache().get(topicName);
        } catch (ExecutionException e) {
            log.error("Failed to load schema info provider for topic {}", topicName, e);
            return FutureUtil.failedFuture(e.getCause());
        }
        schema = schema.clone();
        if (schema.requireFetchingSchemaInfo()) {
            @SuppressWarnings("rawtypes") Schema finalSchema = schema;
            return schemaInfoProvider.getLatestSchema().thenCompose(schemaInfo -> {
                if (null == schemaInfo) {
                    if (!(finalSchema instanceof AutoConsumeSchema) && !(finalSchema instanceof KeyValueSchema)) {
                        // no schema info is found
                        return FutureUtil.failedFuture(new PulsarClientException.NotFoundException("No latest schema found for topic " + topicName));
                    }
                }
                try {
                    log.info("Configuring schema for topic {} : {}", topicName, schemaInfo);
                    finalSchema.configureSchemaInfo(topicName, "topic", schemaInfo);
                } catch (RuntimeException re) {
                    return FutureUtil.failedFuture(re);
                }
                finalSchema.setSchemaInfoProvider(schemaInfoProvider);
                return CompletableFuture.completedFuture(finalSchema);
            });
        } else {
            schema.setSchemaInfoProvider(schemaInfoProvider);
        }
    }
    return CompletableFuture.completedFuture(schema);
}
Also used : AutoConsumeSchema(org.apache.pulsar.client.impl.schema.AutoConsumeSchema) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) AutoProduceBytesSchema(org.apache.pulsar.client.impl.schema.AutoProduceBytesSchema) Schema(org.apache.pulsar.client.api.Schema) AutoConsumeSchema(org.apache.pulsar.client.impl.schema.AutoConsumeSchema) PulsarClientException(org.apache.pulsar.client.api.PulsarClientException) ExecutionException(java.util.concurrent.ExecutionException) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) MultiVersionSchemaInfoProvider(org.apache.pulsar.client.impl.schema.generic.MultiVersionSchemaInfoProvider) SchemaInfoProvider(org.apache.pulsar.client.api.schema.SchemaInfoProvider)

Example 2 with KeyValueSchema

use of org.apache.pulsar.client.api.schema.KeyValueSchema in project pulsar by apache.

the class ElasticSearchSink method extractIdAndDocument.

/**
 * Extract ES _id and _source using the Schema if available.
 *
 * @param record
 * @return A pair for _id and _source
 */
public Pair<String, String> extractIdAndDocument(Record<GenericObject> record) throws JsonProcessingException {
    if (elasticSearchConfig.isSchemaEnable()) {
        Object key = null;
        GenericObject value = null;
        Schema<?> keySchema = null;
        Schema<?> valueSchema = null;
        if (record.getSchema() != null && record.getSchema() instanceof KeyValueSchema) {
            KeyValueSchema<GenericObject, GenericObject> keyValueSchema = (KeyValueSchema) record.getSchema();
            keySchema = keyValueSchema.getKeySchema();
            valueSchema = keyValueSchema.getValueSchema();
            KeyValue<GenericObject, GenericObject> keyValue = (KeyValue<GenericObject, GenericObject>) record.getValue().getNativeObject();
            key = keyValue.getKey();
            value = keyValue.getValue();
        } else {
            key = record.getKey().orElse(null);
            valueSchema = record.getSchema();
            value = record.getValue();
        }
        String id = null;
        if (!elasticSearchConfig.isKeyIgnore() && key != null && keySchema != null) {
            id = stringifyKey(keySchema, key);
        }
        String doc = null;
        if (value != null) {
            if (valueSchema != null) {
                doc = stringifyValue(valueSchema, value);
            } else {
                if (value.getNativeObject() instanceof byte[]) {
                    // for BWC with the ES-Sink
                    doc = new String((byte[]) value.getNativeObject(), StandardCharsets.UTF_8);
                } else {
                    doc = value.getNativeObject().toString();
                }
            }
        }
        if (doc != null && primaryFields != null) {
            try {
                // extract the PK from the JSON document
                JsonNode jsonNode = objectMapper.readTree(doc);
                id = stringifyKey(jsonNode, primaryFields);
            } catch (JsonProcessingException e) {
                log.error("Failed to read JSON", e);
                throw e;
            }
        }
        if (log.isDebugEnabled()) {
            SchemaType schemaType = null;
            if (record.getSchema() != null && record.getSchema().getSchemaInfo() != null) {
                schemaType = record.getSchema().getSchemaInfo().getType();
            }
            log.debug("recordType={} schemaType={} id={} doc={}", record.getClass().getName(), schemaType, id, doc);
        }
        return Pair.of(id, doc);
    } else {
        return Pair.of(null, new String(record.getMessage().orElseThrow(() -> new IllegalArgumentException("Record does not carry message information")).getData(), StandardCharsets.UTF_8));
    }
}
Also used : KeyValue(org.apache.pulsar.common.schema.KeyValue) GenericObject(org.apache.pulsar.client.api.schema.GenericObject) GenericObject(org.apache.pulsar.client.api.schema.GenericObject) JsonNode(com.fasterxml.jackson.databind.JsonNode) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) SchemaType(org.apache.pulsar.common.schema.SchemaType)

Example 3 with KeyValueSchema

use of org.apache.pulsar.client.api.schema.KeyValueSchema in project pulsar by yahoo.

the class TestGenericObjectSink method write.

public void write(Record<GenericObject> record) {
    log.info("topic {}", record.getTopicName().orElse(null));
    log.info("properties {}", record.getProperties());
    log.info("received record {} {}", record, record.getClass());
    log.info("schema {}", record.getSchema());
    log.info("native schema {}", record.getSchema().getNativeSchema().orElse(null));
    log.info("schemaInfo {}", record.getSchema().getSchemaInfo());
    log.info("schemaInfo.type {}", record.getSchema().getSchemaInfo().getType());
    String expectedRecordType = record.getProperties().getOrDefault("expectedType", "MISSING");
    log.info("expectedRecordType {}", expectedRecordType);
    if (!expectedRecordType.equals(record.getSchema().getSchemaInfo().getType().name())) {
        throw new RuntimeException("Unexpected record type " + record.getSchema().getSchemaInfo().getType().name() + " is not " + expectedRecordType);
    }
    log.info("value {}", record.getValue());
    log.info("value schema type {}", record.getValue().getSchemaType());
    log.info("value native object {}", record.getValue().getNativeObject());
    if (record.getSchema().getSchemaInfo().getType() == SchemaType.KEY_VALUE) {
        // assert that we are able to access the schema (leads to ClassCastException if there is a problem)
        KeyValueSchema kvSchema = (KeyValueSchema) record.getSchema();
        log.info("key schema type {}", kvSchema.getKeySchema());
        log.info("value schema type {}", kvSchema.getValueSchema());
        log.info("key encoding {}", kvSchema.getKeyValueEncodingType());
        KeyValue keyValue = (KeyValue) record.getValue().getNativeObject();
        log.info("kvkey {}", keyValue.getKey());
        log.info("kvvalue {}", keyValue.getValue());
    }
    log.info("value {}", record.getValue());
    log.info("value schema type {}", record.getValue().getSchemaType());
    log.info("value native object {} class {}", record.getValue().getNativeObject(), record.getValue().getNativeObject().getClass());
    String expectedSchemaDefinition = record.getProperties().getOrDefault("expectedSchemaDefinition", "");
    log.info("schemaDefinition {}", record.getSchema().getSchemaInfo().getSchemaDefinition());
    log.info("expectedSchemaDefinition {}", expectedSchemaDefinition);
    if (!expectedSchemaDefinition.isEmpty()) {
        String schemaDefinition = record.getSchema().getSchemaInfo().getSchemaDefinition();
        if (!expectedSchemaDefinition.equals(schemaDefinition)) {
            throw new RuntimeException("Unexpected schema definition " + schemaDefinition + " is not " + expectedSchemaDefinition);
        }
    }
    // testing that actually the Sink is able to use Native AVRO
    if (record.getSchema().getSchemaInfo().getType() == SchemaType.AVRO) {
        GenericRecord nativeGenericRecord = (GenericRecord) record.getValue().getNativeObject();
        log.info("Schema from AVRO generic object {}", nativeGenericRecord.getSchema());
    }
    // testing that actually the Sink is able to use Native JSON
    if (record.getSchema().getSchemaInfo().getType() == SchemaType.JSON) {
        JsonNode nativeGenericRecord = (JsonNode) record.getValue().getNativeObject();
        log.info("NodeType from JsonNode generic object {}", nativeGenericRecord.getNodeType());
    }
    record.ack();
}
Also used : KeyValue(org.apache.pulsar.common.schema.KeyValue) JsonNode(com.fasterxml.jackson.databind.JsonNode) GenericRecord(org.apache.avro.generic.GenericRecord) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema)

Example 4 with KeyValueSchema

use of org.apache.pulsar.client.api.schema.KeyValueSchema in project nosqlbench by nosqlbench.

the class PulsarProducerOp method getKeyAvroSchemaFromConfiguration.

private org.apache.avro.Schema getKeyAvroSchemaFromConfiguration() {
    // in case of the race we will parse the string twice, not a big
    if (avroKeySchema == null) {
        if (pulsarSchema.getSchemaInfo().getType() == SchemaType.KEY_VALUE) {
            KeyValueSchema kvSchema = (KeyValueSchema) pulsarSchema;
            Schema keySchema = kvSchema.getKeySchema();
            String avroDefStr = keySchema.getSchemaInfo().getSchemaDefinition();
            avroKeySchema = AvroUtil.GetSchema_ApacheAvro(avroDefStr);
        } else {
            throw new RuntimeException("We are not using KEY_VALUE schema, so no Schema for the Key!");
        }
    }
    return avroKeySchema;
}
Also used : GenericAvroSchema(org.apache.pulsar.client.impl.schema.generic.GenericAvroSchema) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema)

Example 5 with KeyValueSchema

use of org.apache.pulsar.client.api.schema.KeyValueSchema in project nosqlbench by nosqlbench.

the class PulsarProducerOp method run.

@Override
public void run(Runnable timeTracker) {
    if (StringUtils.isBlank(msgPayload)) {
        throw new PulsarDriverParamException("Message payload (\"msg-value\") can't be empty!");
    }
    TypedMessageBuilder typedMessageBuilder;
    final Transaction transaction;
    if (useTransaction) {
        // if you are in a transaction you cannot set the schema per-message
        transaction = transactionSupplier.get();
        typedMessageBuilder = producer.newMessage(transaction);
    } else {
        transaction = null;
        typedMessageBuilder = producer.newMessage(pulsarSchema);
    }
    // set message key
    if (!StringUtils.isBlank(msgKey)) {
        typedMessageBuilder = typedMessageBuilder.key(msgKey);
    }
    // set message properties
    if (!msgProperties.isEmpty()) {
        typedMessageBuilder = typedMessageBuilder.properties(msgProperties);
    }
    // set message payload
    int messageSize;
    SchemaType schemaType = pulsarSchema.getSchemaInfo().getType();
    if (pulsarSchema instanceof KeyValueSchema) {
        // {KEY IN JSON}||{VALUE IN JSON}
        int separator = msgPayload.indexOf("}||{");
        if (separator < 0) {
            throw new IllegalArgumentException("KeyValue payload MUST be in form {KEY IN JSON}||{VALUE IN JSON} (with 2 pipes that separate the KEY part from the VALUE part)");
        }
        String keyInput = msgPayload.substring(0, separator + 1);
        String valueInput = msgPayload.substring(separator + 3);
        KeyValueSchema keyValueSchema = (KeyValueSchema) pulsarSchema;
        org.apache.avro.Schema avroSchema = getAvroSchemaFromConfiguration();
        GenericRecord payload = AvroUtil.GetGenericRecord_PulsarAvro((GenericAvroSchema) keyValueSchema.getValueSchema(), avroSchema, valueInput);
        org.apache.avro.Schema avroSchemaForKey = getKeyAvroSchemaFromConfiguration();
        GenericRecord key = AvroUtil.GetGenericRecord_PulsarAvro((GenericAvroSchema) keyValueSchema.getKeySchema(), avroSchemaForKey, keyInput);
        typedMessageBuilder = typedMessageBuilder.value(new KeyValue(key, payload));
        // TODO: add a way to calculate the message size for KEY_VALUE messages
        messageSize = msgPayload.length();
    } else if (PulsarActivityUtil.isAvroSchemaTypeStr(schemaType.name())) {
        GenericRecord payload = AvroUtil.GetGenericRecord_PulsarAvro((GenericAvroSchema) pulsarSchema, pulsarSchema.getSchemaInfo().getSchemaDefinition(), msgPayload);
        typedMessageBuilder = typedMessageBuilder.value(payload);
        // TODO: add a way to calculate the message size for AVRO messages
        messageSize = msgPayload.length();
    } else {
        byte[] array = msgPayload.getBytes(StandardCharsets.UTF_8);
        typedMessageBuilder = typedMessageBuilder.value(array);
        messageSize = array.length;
    }
    messageSizeHistogram.update(messageSize);
    bytesCounter.inc(messageSize);
    // TODO: add error handling with failed message production
    if (!asyncPulsarOp) {
        try {
            logger.trace("Sending message");
            typedMessageBuilder.send();
            if (useTransaction) {
                try (Timer.Context ctx = transactionCommitTimer.time()) {
                    transaction.commit().get();
                }
            }
            if (logger.isDebugEnabled()) {
                if (PulsarActivityUtil.isAvroSchemaTypeStr(schemaType.name())) {
                    org.apache.avro.Schema avroSchema = getAvroSchemaFromConfiguration();
                    org.apache.avro.generic.GenericRecord avroGenericRecord = AvroUtil.GetGenericRecord_ApacheAvro(avroSchema, msgPayload);
                    logger.debug("({}) Sync message sent: msg-key={}; msg-properties={}; msg-payload={})", producer.getProducerName(), msgKey, msgProperties, avroGenericRecord.toString());
                } else {
                    logger.debug("({}) Sync message sent; msg-key={}; msg-properties={}; msg-payload={}", producer.getProducerName(), msgKey, msgProperties, msgPayload);
                }
            }
        } catch (PulsarClientException | ExecutionException | InterruptedException pce) {
            String errMsg = "Sync message sending failed: " + "key - " + msgKey + "; " + "properties - " + msgProperties + "; " + "payload - " + msgPayload;
            logger.trace(errMsg);
            throw new PulsarDriverUnexpectedException(errMsg);
        }
        timeTracker.run();
    } else {
        try {
            // we rely on blockIfQueueIsFull in order to throttle the request in this case
            CompletableFuture<?> future = typedMessageBuilder.sendAsync();
            if (useTransaction) {
                // add commit step
                future = future.thenCompose(msg -> {
                    Timer.Context ctx = transactionCommitTimer.time();
                    return transaction.commit().whenComplete((m, e) -> ctx.close()).thenApply(v -> msg);
                });
            }
            future.whenComplete((messageId, error) -> {
                if (logger.isDebugEnabled()) {
                    if (PulsarActivityUtil.isAvroSchemaTypeStr(schemaType.name())) {
                        org.apache.avro.Schema avroSchema = getAvroSchemaFromConfiguration();
                        org.apache.avro.generic.GenericRecord avroGenericRecord = AvroUtil.GetGenericRecord_ApacheAvro(avroSchema, msgPayload);
                        logger.debug("({}) Aysnc message sent: msg-key={}; msg-properties={}; msg-payload={})", producer.getProducerName(), msgKey, msgProperties, avroGenericRecord.toString());
                    } else {
                        logger.debug("({}) Aysnc message sent: msg-key={}; msg-properties={}; msg-payload={}", producer.getProducerName(), msgKey, msgProperties, msgPayload);
                    }
                }
                timeTracker.run();
            }).exceptionally(ex -> {
                logger.error("Async message sending failed: " + "key - " + msgKey + "; " + "properties - " + msgProperties + "; " + "payload - " + msgPayload);
                pulsarActivity.asyncOperationFailed(ex);
                return null;
            });
        } catch (Exception e) {
            throw new PulsarDriverUnexpectedException(e);
        }
    }
}
Also used : Histogram(com.codahale.metrics.Histogram) org.apache.pulsar.client.api(org.apache.pulsar.client.api) CompletableFuture(java.util.concurrent.CompletableFuture) PulsarActivity(io.nosqlbench.driver.pulsar.PulsarActivity) StringUtils(org.apache.commons.lang3.StringUtils) Supplier(java.util.function.Supplier) Transaction(org.apache.pulsar.client.api.transaction.Transaction) SchemaType(org.apache.pulsar.common.schema.SchemaType) StandardCharsets(java.nio.charset.StandardCharsets) PulsarDriverParamException(io.nosqlbench.driver.pulsar.exception.PulsarDriverParamException) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) KeyValue(org.apache.pulsar.common.schema.KeyValue) ExecutionException(java.util.concurrent.ExecutionException) PulsarDriverUnexpectedException(io.nosqlbench.driver.pulsar.exception.PulsarDriverUnexpectedException) Logger(org.apache.logging.log4j.Logger) PulsarActivityUtil(io.nosqlbench.driver.pulsar.util.PulsarActivityUtil) AvroUtil(io.nosqlbench.driver.pulsar.util.AvroUtil) Map(java.util.Map) Counter(com.codahale.metrics.Counter) GenericAvroSchema(org.apache.pulsar.client.impl.schema.generic.GenericAvroSchema) Timer(com.codahale.metrics.Timer) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) LogManager(org.apache.logging.log4j.LogManager) KeyValue(org.apache.pulsar.common.schema.KeyValue) GenericAvroSchema(org.apache.pulsar.client.impl.schema.generic.GenericAvroSchema) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) SchemaType(org.apache.pulsar.common.schema.SchemaType) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) ExecutionException(java.util.concurrent.ExecutionException) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) GenericAvroSchema(org.apache.pulsar.client.impl.schema.generic.GenericAvroSchema) PulsarDriverParamException(io.nosqlbench.driver.pulsar.exception.PulsarDriverParamException) ExecutionException(java.util.concurrent.ExecutionException) PulsarDriverUnexpectedException(io.nosqlbench.driver.pulsar.exception.PulsarDriverUnexpectedException) PulsarDriverParamException(io.nosqlbench.driver.pulsar.exception.PulsarDriverParamException) PulsarDriverUnexpectedException(io.nosqlbench.driver.pulsar.exception.PulsarDriverUnexpectedException) Transaction(org.apache.pulsar.client.api.transaction.Transaction) Timer(com.codahale.metrics.Timer)

Aggregations

KeyValueSchema (org.apache.pulsar.client.api.schema.KeyValueSchema)19 KeyValue (org.apache.pulsar.common.schema.KeyValue)12 GenericObject (org.apache.pulsar.client.api.schema.GenericObject)9 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 SchemaType (org.apache.pulsar.common.schema.SchemaType)6 Schema (org.apache.pulsar.client.api.Schema)5 ExecutionException (java.util.concurrent.ExecutionException)4 GenericRecord (org.apache.pulsar.client.api.schema.GenericRecord)4 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 TopicPartition (org.apache.kafka.common.TopicPartition)3 TimestampType (org.apache.kafka.common.record.TimestampType)3 Schema (org.apache.kafka.connect.data.Schema)3 SinkRecord (org.apache.kafka.connect.sink.SinkRecord)3 PulsarClientException (org.apache.pulsar.client.api.PulsarClientException)3 SchemaInfoProvider (org.apache.pulsar.client.api.schema.SchemaInfoProvider)3 AutoConsumeSchema (org.apache.pulsar.client.impl.schema.AutoConsumeSchema)3 GenericAvroSchema (org.apache.pulsar.client.impl.schema.generic.GenericAvroSchema)3 PulsarSchemaToKafkaSchema (org.apache.pulsar.io.kafka.connect.schema.PulsarSchemaToKafkaSchema)3