Search in sources :

Example 1 with Source

use of org.apache.pulsar.io.core.Source in project pulsar by apache.

the class JavaInstanceRunnable method setupInput.

private void setupInput(ContextImpl contextImpl) throws Exception {
    SourceSpec sourceSpec = this.instanceConfig.getFunctionDetails().getSource();
    Object object;
    // If source classname is not set, we default pulsar source
    if (sourceSpec.getClassName().isEmpty()) {
        Map<String, ConsumerConfig> topicSchema = new TreeMap<>();
        sourceSpec.getInputSpecsMap().forEach((topic, conf) -> {
            ConsumerConfig consumerConfig = ConsumerConfig.builder().isRegexPattern(conf.getIsRegexPattern()).build();
            if (conf.getSchemaType() != null && !conf.getSchemaType().isEmpty()) {
                consumerConfig.setSchemaType(conf.getSchemaType());
            } else if (conf.getSerdeClassName() != null && !conf.getSerdeClassName().isEmpty()) {
                consumerConfig.setSerdeClassName(conf.getSerdeClassName());
            }
            consumerConfig.setSchemaProperties(conf.getSchemaPropertiesMap());
            consumerConfig.setConsumerProperties(conf.getConsumerPropertiesMap());
            if (conf.hasReceiverQueueSize()) {
                consumerConfig.setReceiverQueueSize(conf.getReceiverQueueSize().getValue());
            }
            if (conf.hasCryptoSpec()) {
                consumerConfig.setCryptoConfig(CryptoUtils.convertFromSpec(conf.getCryptoSpec()));
            }
            consumerConfig.setPoolMessages(conf.getPoolMessages());
            topicSchema.put(topic, consumerConfig);
        });
        sourceSpec.getTopicsToSerDeClassNameMap().forEach((topic, serde) -> {
            topicSchema.put(topic, ConsumerConfig.builder().serdeClassName(serde).isRegexPattern(false).build());
        });
        if (!StringUtils.isEmpty(sourceSpec.getTopicsPattern())) {
            topicSchema.get(sourceSpec.getTopicsPattern()).setRegexPattern(true);
        }
        PulsarSourceConfig pulsarSourceConfig;
        // we can use a single consumer to read
        if (topicSchema.size() == 1) {
            SingleConsumerPulsarSourceConfig singleConsumerPulsarSourceConfig = new SingleConsumerPulsarSourceConfig();
            Map.Entry<String, ConsumerConfig> entry = topicSchema.entrySet().iterator().next();
            singleConsumerPulsarSourceConfig.setTopic(entry.getKey());
            singleConsumerPulsarSourceConfig.setConsumerConfig(entry.getValue());
            pulsarSourceConfig = singleConsumerPulsarSourceConfig;
        } else {
            MultiConsumerPulsarSourceConfig multiConsumerPulsarSourceConfig = new MultiConsumerPulsarSourceConfig();
            multiConsumerPulsarSourceConfig.setTopicSchema(topicSchema);
            pulsarSourceConfig = multiConsumerPulsarSourceConfig;
        }
        pulsarSourceConfig.setSubscriptionName(StringUtils.isNotBlank(sourceSpec.getSubscriptionName()) ? sourceSpec.getSubscriptionName() : InstanceUtils.getDefaultSubscriptionName(instanceConfig.getFunctionDetails()));
        pulsarSourceConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.valueOf(this.instanceConfig.getFunctionDetails().getProcessingGuarantees().name()));
        pulsarSourceConfig.setSubscriptionPosition(convertFromFunctionDetailsSubscriptionPosition(sourceSpec.getSubscriptionPosition()));
        checkNotNull(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setSubscriptionType(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setTypeClassName(sourceSpec.getTypeClassName());
        if (sourceSpec.getTimeoutMs() > 0) {
            pulsarSourceConfig.setTimeoutMs(sourceSpec.getTimeoutMs());
        }
        if (sourceSpec.getNegativeAckRedeliveryDelayMs() > 0) {
            pulsarSourceConfig.setNegativeAckRedeliveryDelayMs(sourceSpec.getNegativeAckRedeliveryDelayMs());
        }
        if (this.instanceConfig.getFunctionDetails().hasRetryDetails()) {
            pulsarSourceConfig.setMaxMessageRetries(this.instanceConfig.getFunctionDetails().getRetryDetails().getMaxMessageRetries());
            pulsarSourceConfig.setDeadLetterTopic(this.instanceConfig.getFunctionDetails().getRetryDetails().getDeadLetterTopic());
        }
        // that require messages to be put into an immediate queue
        if (pulsarSourceConfig instanceof SingleConsumerPulsarSourceConfig) {
            object = new SingleConsumerPulsarSource(this.client, (SingleConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        } else {
            object = new MultiConsumerPulsarSource(this.client, (MultiConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        }
    } else {
        // check if source is a batch source
        if (sourceSpec.getClassName().equals(BatchSourceExecutor.class.getName())) {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.instanceClassLoader);
        } else {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.functionClassLoader);
        }
    }
    Class<?>[] typeArgs;
    if (object instanceof Source) {
        typeArgs = TypeResolver.resolveRawArguments(Source.class, object.getClass());
        assert typeArgs.length > 0;
    } else {
        throw new RuntimeException("Source does not implement correct interface");
    }
    this.source = (Source<?>) object;
    if (componentType == org.apache.pulsar.functions.proto.Function.FunctionDetails.ComponentType.SOURCE) {
        Thread.currentThread().setContextClassLoader(this.functionClassLoader);
    }
    try {
        if (sourceSpec.getConfigs().isEmpty()) {
            this.source.open(new HashMap<>(), contextImpl);
        } else {
            this.source.open(ObjectMapperFactory.getThreadLocal().readValue(sourceSpec.getConfigs(), new TypeReference<Map<String, Object>>() {
            }), contextImpl);
        }
        if (this.source instanceof PulsarSource) {
            contextImpl.setInputConsumers(((PulsarSource) this.source).getInputConsumers());
        }
    } catch (Exception e) {
        log.error("Source open produced uncaught exception: ", e);
        throw e;
    } finally {
        Thread.currentThread().setContextClassLoader(this.instanceClassLoader);
    }
}
Also used : SourceSpec(org.apache.pulsar.functions.proto.Function.SourceSpec) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) PulsarSourceConfig(org.apache.pulsar.functions.source.PulsarSourceConfig) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) TreeMap(java.util.TreeMap) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) Source(org.apache.pulsar.io.core.Source) PulsarClientException(org.apache.pulsar.client.api.PulsarClientException) IOException(java.io.IOException) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) BatchSourceExecutor(org.apache.pulsar.functions.source.batch.BatchSourceExecutor) ConsumerConfig(org.apache.pulsar.common.functions.ConsumerConfig) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource)

Example 2 with Source

use of org.apache.pulsar.io.core.Source in project cdc-apache-cassandra by datastax.

the class CassandraSource method batchRead.

@SuppressWarnings("unchecked")
private List<MyKVRecord> batchRead() throws Exception {
    batchTotalLatency.set(0);
    batchTotalQuery.set(0);
    List<MyKVRecord> newRecords = new ArrayList<>();
    if (this.queryExecutors == null)
        initQueryExecutors();
    try {
        maybeInitCassandraClient();
        // this method will block until we receive at least one record
        while (newRecords.size() < this.config.getBatchSize()) {
            final Message<KeyValue<GenericRecord, MutationValue>> msg = consumer.receive(1, TimeUnit.SECONDS);
            if (msg == null) {
                if (!newRecords.isEmpty()) {
                    log.debug("no message received, buffer size {}", newRecords.size());
                    // no more records within the timeout, but we have at least one record
                    break;
                } else {
                    log.debug("no message received");
                    continue;
                }
            }
            final KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
            final GenericRecord mutationKey = kv.getKey();
            final MutationValue mutationValue = kv.getValue();
            log.debug("Message from producer={} msgId={} key={} value={} schema {}\n", msg.getProducerName(), msg.getMessageId(), kv.getKey(), kv.getValue(), msg.getReaderSchema().orElse(null));
            List<Object> pk = (List<Object>) mutationKeyConverter.fromConnectData(mutationKey.getNativeObject());
            // ensure the schema is the one used when building the struct.
            final ConverterAndQuery converterAndQueryFinal = this.valueConverterAndQuery;
            CompletableFuture<KeyValue<Object, Object>> queryResult = new CompletableFuture<>();
            // we have to process sequentially the records from the same key
            // otherwise our mutation cache will not be enough efficient
            // in deduplicating mutations coming from different nodes
            executeOrdered(msg.getKey(), () -> {
                try {
                    if (mutationCache.isMutationProcessed(msg.getKey(), mutationValue.getMd5Digest())) {
                        log.debug("Message key={} md5={} already processed", msg.getKey(), mutationValue.getMd5Digest());
                        // ignore duplicated mutation
                        consumer.acknowledge(msg);
                        queryResult.complete(null);
                        CacheStats cacheStats = mutationCache.stats();
                        sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
                        sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
                        sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
                        sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
                        sourceContext.recordMetric(QUERY_LATENCY, 0);
                        sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
                        if (msg.hasProperty(Constants.WRITETIME))
                            sourceContext.recordMetric(REPLICATION_LATENCY, System.currentTimeMillis() - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
                        return null;
                    }
                    List<Object> nonNullPkValues = pk.stream().filter(e -> e != null).collect(Collectors.toList());
                    long start = System.currentTimeMillis();
                    Tuple3<Row, ConsistencyLevel, UUID> tuple = cassandraClient.selectRow(nonNullPkValues, mutationValue.getNodeId(), Lists.newArrayList(ConsistencyLevel.LOCAL_QUORUM, ConsistencyLevel.LOCAL_ONE), getSelectStatement(converterAndQueryFinal, nonNullPkValues.size()), mutationValue.getMd5Digest());
                    CacheStats cacheStats = mutationCache.stats();
                    sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
                    sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
                    sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
                    sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
                    long end = System.currentTimeMillis();
                    sourceContext.recordMetric(QUERY_LATENCY, end - start);
                    sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
                    batchTotalLatency.addAndGet(end - start);
                    batchTotalQuery.incrementAndGet();
                    if (msg.hasProperty(Constants.WRITETIME))
                        sourceContext.recordMetric(REPLICATION_LATENCY, end - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
                    Object value = tuple._1 == null ? null : converterAndQueryFinal.getConverter().toConnectData(tuple._1);
                    if (ConsistencyLevel.LOCAL_QUORUM.equals(tuple._2()) && (!config.getCacheOnlyIfCoordinatorMatch() || (tuple._3 != null && tuple._3.equals(mutationValue.getNodeId())))) {
                        log.debug("Caching mutation key={} md5={} pk={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues);
                        // cache the mutation digest if the coordinator is the source of this event.
                        mutationCache.addMutationMd5(msg.getKey(), mutationValue.getMd5Digest());
                    } else {
                        log.debug("Not caching mutation key={} md5={} pk={} CL={} coordinator={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues, tuple._2(), tuple._3());
                    }
                    queryResult.complete(new KeyValue(msg.getKeyBytes(), value));
                } catch (Throwable err) {
                    queryResult.completeExceptionally(err);
                }
                return null;
            });
            final MyKVRecord record = new MyKVRecord(converterAndQueryFinal, queryResult, msg);
            newRecords.add(record);
        }
        Preconditions.checkState(!newRecords.isEmpty(), "Buffer cannot be empty here");
        List<MyKVRecord> usefulRecords = new ArrayList<>(newRecords.size());
        int cacheHits = 0;
        long start = System.currentTimeMillis();
        // wait for all queries to complete
        for (MyKVRecord record : newRecords) {
            KeyValue res = record.keyValue.join();
            if (res != null) {
                // if the result is "null" the mutation has been discarded
                usefulRecords.add(record);
            } else {
                cacheHits++;
            }
        }
        long duration = System.currentTimeMillis() - start;
        long throughput = duration > 0 ? (1000L * newRecords.size()) / duration : 0;
        log.debug("Query time for {} msg in {} ms throughput={} msg/s cacheHits={}", newRecords.size(), duration, throughput, cacheHits);
        if (batchTotalQuery.get() > 0) {
            adjustExecutors();
        }
        consecutiveUnavailableException = 0;
        return usefulRecords;
    } catch (CompletionException e) {
        Throwable e2 = e.getCause();
        if (e2 instanceof ExecutionException) {
            e2 = e2.getCause();
        }
        log.info("CompletionException cause:", e2);
        if (e2 instanceof com.datastax.oss.driver.api.core.servererrors.ReadTimeoutException || e2 instanceof com.datastax.oss.driver.api.core.servererrors.OverloadedException) {
            decreaseExecutors(e2);
        } else if (e2 instanceof com.datastax.oss.driver.api.core.AllNodesFailedException) {
        // just retry
        } else {
            log.warn("Unexpected exception class=" + e.getClass() + " message=" + e.getMessage() + " cause={}" + e.getCause(), e);
            throw e;
        }
        for (MyKVRecord record : newRecords) {
            // fail every message in the buffer
            negativeAcknowledge(consumer, record.getMsg());
        }
        backoffRetry(e2);
        return Collections.emptyList();
    } catch (com.datastax.oss.driver.api.core.AllNodesFailedException e) {
        log.info("AllNodesFailedException:", e);
        for (MyKVRecord record : newRecords) {
            // fail every message in the buffer
            negativeAcknowledge(consumer, record.getMsg());
        }
        backoffRetry(e);
        return Collections.emptyList();
    } catch (Throwable e) {
        log.error("Unrecoverable error:", e);
        for (MyKVRecord record : newRecords) {
            negativeAcknowledge(consumer, record.getMsg());
        }
        throw e;
    }
}
Also used : Arrays(java.util.Arrays) CqlLogicalTypes(com.datastax.oss.cdc.CqlLogicalTypes) KVRecord(org.apache.pulsar.functions.api.KVRecord) SneakyThrows(lombok.SneakyThrows) SubscriptionMode(org.apache.pulsar.client.api.SubscriptionMode) SourceContext(org.apache.pulsar.io.core.SourceContext) Future(java.util.concurrent.Future) CassandraClient(com.datastax.oss.cdc.CassandraClient) Locale(java.util.Locale) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Duration(java.time.Duration) Map(java.util.Map) ConsistencyLevel(com.datastax.oss.driver.api.core.ConsistencyLevel) KeySharedPolicy(org.apache.pulsar.client.api.KeySharedPolicy) Version(com.datastax.oss.cdc.Version) MutationValue(com.datastax.oss.cdc.MutationValue) Record(org.apache.pulsar.functions.api.Record) SpecificData(org.apache.avro.specific.SpecificData) Conversions(org.apache.avro.Conversions) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Connector(org.apache.pulsar.io.core.annotations.Connector) InvocationTargetException(java.lang.reflect.InvocationTargetException) AggregateMetadata(com.datastax.oss.driver.api.core.metadata.schema.AggregateMetadata) Objects(java.util.Objects) Consumer(org.apache.pulsar.client.api.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) KeyspaceMetadata(com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata) Slf4j(lombok.extern.slf4j.Slf4j) List(java.util.List) IOType(org.apache.pulsar.io.core.annotations.IOType) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) CacheStats(com.github.benmanes.caffeine.cache.stats.CacheStats) ConsumerBuilder(org.apache.pulsar.client.api.ConsumerBuilder) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) Message(org.apache.pulsar.client.api.Message) ViewMetadata(com.datastax.oss.driver.api.core.metadata.schema.ViewMetadata) SubscriptionInitialPosition(org.apache.pulsar.client.api.SubscriptionInitialPosition) ArrayList(java.util.ArrayList) KeyValue(org.apache.pulsar.common.schema.KeyValue) Strings(com.google.common.base.Strings) Lists(com.google.common.collect.Lists) NativeAvroConverter(com.datastax.oss.pulsar.source.converters.NativeAvroConverter) KeyValueEncodingType(org.apache.pulsar.common.schema.KeyValueEncodingType) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) MutationCache(com.datastax.oss.cdc.MutationCache) Source(org.apache.pulsar.io.core.Source) Row(com.datastax.oss.driver.api.core.cql.Row) ExecutorService(java.util.concurrent.ExecutorService) CassandraSourceConnectorConfig(com.datastax.oss.cdc.CassandraSourceConnectorConfig) TableMetadata(com.datastax.oss.driver.api.core.metadata.schema.TableMetadata) ConfigUtil(com.datastax.oss.cdc.ConfigUtil) PreparedStatement(com.datastax.oss.driver.api.core.cql.PreparedStatement) SubscriptionType(org.apache.pulsar.client.api.SubscriptionType) Constants(com.datastax.oss.cdc.Constants) ColumnMetadata(com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata) Schema(org.apache.pulsar.client.api.Schema) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) UserDefinedType(com.datastax.oss.driver.api.core.type.UserDefinedType) AtomicLong(java.util.concurrent.atomic.AtomicLong) Tuple2(io.vavr.Tuple2) SchemaChangeListener(com.datastax.oss.driver.api.core.metadata.schema.SchemaChangeListener) Tuple3(io.vavr.Tuple3) FunctionMetadata(com.datastax.oss.driver.api.core.metadata.schema.FunctionMetadata) Preconditions(com.google.common.base.Preconditions) Collections(java.util.Collections) KeyValue(org.apache.pulsar.common.schema.KeyValue) ArrayList(java.util.ArrayList) ConsistencyLevel(com.datastax.oss.driver.api.core.ConsistencyLevel) CompletableFuture(java.util.concurrent.CompletableFuture) List(java.util.List) ArrayList(java.util.ArrayList) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) UUID(java.util.UUID) ExecutionException(java.util.concurrent.ExecutionException) MutationValue(com.datastax.oss.cdc.MutationValue) CompletionException(java.util.concurrent.CompletionException) CacheStats(com.github.benmanes.caffeine.cache.stats.CacheStats) Row(com.datastax.oss.driver.api.core.cql.Row)

Example 3 with Source

use of org.apache.pulsar.io.core.Source in project incubator-pulsar by apache.

the class JavaInstanceRunnable method setupInput.

private void setupInput(ContextImpl contextImpl) throws Exception {
    SourceSpec sourceSpec = this.instanceConfig.getFunctionDetails().getSource();
    Object object;
    // If source classname is not set, we default pulsar source
    if (sourceSpec.getClassName().isEmpty()) {
        Map<String, ConsumerConfig> topicSchema = new TreeMap<>();
        sourceSpec.getInputSpecsMap().forEach((topic, conf) -> {
            ConsumerConfig consumerConfig = ConsumerConfig.builder().isRegexPattern(conf.getIsRegexPattern()).build();
            if (conf.getSchemaType() != null && !conf.getSchemaType().isEmpty()) {
                consumerConfig.setSchemaType(conf.getSchemaType());
            } else if (conf.getSerdeClassName() != null && !conf.getSerdeClassName().isEmpty()) {
                consumerConfig.setSerdeClassName(conf.getSerdeClassName());
            }
            consumerConfig.setSchemaProperties(conf.getSchemaPropertiesMap());
            consumerConfig.setConsumerProperties(conf.getConsumerPropertiesMap());
            if (conf.hasReceiverQueueSize()) {
                consumerConfig.setReceiverQueueSize(conf.getReceiverQueueSize().getValue());
            }
            if (conf.hasCryptoSpec()) {
                consumerConfig.setCryptoConfig(CryptoUtils.convertFromSpec(conf.getCryptoSpec()));
            }
            consumerConfig.setPoolMessages(conf.getPoolMessages());
            topicSchema.put(topic, consumerConfig);
        });
        sourceSpec.getTopicsToSerDeClassNameMap().forEach((topic, serde) -> {
            topicSchema.put(topic, ConsumerConfig.builder().serdeClassName(serde).isRegexPattern(false).build());
        });
        if (!StringUtils.isEmpty(sourceSpec.getTopicsPattern())) {
            topicSchema.get(sourceSpec.getTopicsPattern()).setRegexPattern(true);
        }
        PulsarSourceConfig pulsarSourceConfig;
        // we can use a single consumer to read
        if (topicSchema.size() == 1) {
            SingleConsumerPulsarSourceConfig singleConsumerPulsarSourceConfig = new SingleConsumerPulsarSourceConfig();
            Map.Entry<String, ConsumerConfig> entry = topicSchema.entrySet().iterator().next();
            singleConsumerPulsarSourceConfig.setTopic(entry.getKey());
            singleConsumerPulsarSourceConfig.setConsumerConfig(entry.getValue());
            pulsarSourceConfig = singleConsumerPulsarSourceConfig;
        } else {
            MultiConsumerPulsarSourceConfig multiConsumerPulsarSourceConfig = new MultiConsumerPulsarSourceConfig();
            multiConsumerPulsarSourceConfig.setTopicSchema(topicSchema);
            pulsarSourceConfig = multiConsumerPulsarSourceConfig;
        }
        pulsarSourceConfig.setSubscriptionName(StringUtils.isNotBlank(sourceSpec.getSubscriptionName()) ? sourceSpec.getSubscriptionName() : InstanceUtils.getDefaultSubscriptionName(instanceConfig.getFunctionDetails()));
        pulsarSourceConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.valueOf(this.instanceConfig.getFunctionDetails().getProcessingGuarantees().name()));
        pulsarSourceConfig.setSubscriptionPosition(convertFromFunctionDetailsSubscriptionPosition(sourceSpec.getSubscriptionPosition()));
        checkNotNull(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setSubscriptionType(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setTypeClassName(sourceSpec.getTypeClassName());
        if (sourceSpec.getTimeoutMs() > 0) {
            pulsarSourceConfig.setTimeoutMs(sourceSpec.getTimeoutMs());
        }
        if (sourceSpec.getNegativeAckRedeliveryDelayMs() > 0) {
            pulsarSourceConfig.setNegativeAckRedeliveryDelayMs(sourceSpec.getNegativeAckRedeliveryDelayMs());
        }
        if (this.instanceConfig.getFunctionDetails().hasRetryDetails()) {
            pulsarSourceConfig.setMaxMessageRetries(this.instanceConfig.getFunctionDetails().getRetryDetails().getMaxMessageRetries());
            pulsarSourceConfig.setDeadLetterTopic(this.instanceConfig.getFunctionDetails().getRetryDetails().getDeadLetterTopic());
        }
        // that require messages to be put into an immediate queue
        if (pulsarSourceConfig instanceof SingleConsumerPulsarSourceConfig) {
            object = new SingleConsumerPulsarSource(this.client, (SingleConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        } else {
            object = new MultiConsumerPulsarSource(this.client, (MultiConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        }
    } else {
        // check if source is a batch source
        if (sourceSpec.getClassName().equals(BatchSourceExecutor.class.getName())) {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.instanceClassLoader);
        } else {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.functionClassLoader);
        }
    }
    Class<?>[] typeArgs;
    if (object instanceof Source) {
        typeArgs = TypeResolver.resolveRawArguments(Source.class, object.getClass());
        assert typeArgs.length > 0;
    } else {
        throw new RuntimeException("Source does not implement correct interface");
    }
    this.source = (Source<?>) object;
    if (componentType == org.apache.pulsar.functions.proto.Function.FunctionDetails.ComponentType.SOURCE) {
        Thread.currentThread().setContextClassLoader(this.functionClassLoader);
    }
    try {
        if (sourceSpec.getConfigs().isEmpty()) {
            this.source.open(new HashMap<>(), contextImpl);
        } else {
            this.source.open(ObjectMapperFactory.getThreadLocal().readValue(sourceSpec.getConfigs(), new TypeReference<Map<String, Object>>() {
            }), contextImpl);
        }
        if (this.source instanceof PulsarSource) {
            contextImpl.setInputConsumers(((PulsarSource) this.source).getInputConsumers());
        }
    } catch (Exception e) {
        log.error("Source open produced uncaught exception: ", e);
        throw e;
    } finally {
        Thread.currentThread().setContextClassLoader(this.instanceClassLoader);
    }
}
Also used : SourceSpec(org.apache.pulsar.functions.proto.Function.SourceSpec) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) PulsarSourceConfig(org.apache.pulsar.functions.source.PulsarSourceConfig) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) TreeMap(java.util.TreeMap) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) Source(org.apache.pulsar.io.core.Source) PulsarClientException(org.apache.pulsar.client.api.PulsarClientException) IOException(java.io.IOException) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) BatchSourceExecutor(org.apache.pulsar.functions.source.batch.BatchSourceExecutor) ConsumerConfig(org.apache.pulsar.common.functions.ConsumerConfig) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource)

Example 4 with Source

use of org.apache.pulsar.io.core.Source in project pulsar by yahoo.

the class JavaInstanceRunnable method setupInput.

private void setupInput(ContextImpl contextImpl) throws Exception {
    SourceSpec sourceSpec = this.instanceConfig.getFunctionDetails().getSource();
    Object object;
    // If source classname is not set, we default pulsar source
    if (sourceSpec.getClassName().isEmpty()) {
        Map<String, ConsumerConfig> topicSchema = new TreeMap<>();
        sourceSpec.getInputSpecsMap().forEach((topic, conf) -> {
            ConsumerConfig consumerConfig = ConsumerConfig.builder().isRegexPattern(conf.getIsRegexPattern()).build();
            if (conf.getSchemaType() != null && !conf.getSchemaType().isEmpty()) {
                consumerConfig.setSchemaType(conf.getSchemaType());
            } else if (conf.getSerdeClassName() != null && !conf.getSerdeClassName().isEmpty()) {
                consumerConfig.setSerdeClassName(conf.getSerdeClassName());
            }
            consumerConfig.setSchemaProperties(conf.getSchemaPropertiesMap());
            consumerConfig.setConsumerProperties(conf.getConsumerPropertiesMap());
            if (conf.hasReceiverQueueSize()) {
                consumerConfig.setReceiverQueueSize(conf.getReceiverQueueSize().getValue());
            }
            if (conf.hasCryptoSpec()) {
                consumerConfig.setCryptoConfig(CryptoUtils.convertFromSpec(conf.getCryptoSpec()));
            }
            consumerConfig.setPoolMessages(conf.getPoolMessages());
            topicSchema.put(topic, consumerConfig);
        });
        sourceSpec.getTopicsToSerDeClassNameMap().forEach((topic, serde) -> {
            topicSchema.put(topic, ConsumerConfig.builder().serdeClassName(serde).isRegexPattern(false).build());
        });
        if (!StringUtils.isEmpty(sourceSpec.getTopicsPattern())) {
            topicSchema.get(sourceSpec.getTopicsPattern()).setRegexPattern(true);
        }
        PulsarSourceConfig pulsarSourceConfig;
        // we can use a single consumer to read
        if (topicSchema.size() == 1) {
            SingleConsumerPulsarSourceConfig singleConsumerPulsarSourceConfig = new SingleConsumerPulsarSourceConfig();
            Map.Entry<String, ConsumerConfig> entry = topicSchema.entrySet().iterator().next();
            singleConsumerPulsarSourceConfig.setTopic(entry.getKey());
            singleConsumerPulsarSourceConfig.setConsumerConfig(entry.getValue());
            pulsarSourceConfig = singleConsumerPulsarSourceConfig;
        } else {
            MultiConsumerPulsarSourceConfig multiConsumerPulsarSourceConfig = new MultiConsumerPulsarSourceConfig();
            multiConsumerPulsarSourceConfig.setTopicSchema(topicSchema);
            pulsarSourceConfig = multiConsumerPulsarSourceConfig;
        }
        pulsarSourceConfig.setSubscriptionName(StringUtils.isNotBlank(sourceSpec.getSubscriptionName()) ? sourceSpec.getSubscriptionName() : InstanceUtils.getDefaultSubscriptionName(instanceConfig.getFunctionDetails()));
        pulsarSourceConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.valueOf(this.instanceConfig.getFunctionDetails().getProcessingGuarantees().name()));
        pulsarSourceConfig.setSubscriptionPosition(convertFromFunctionDetailsSubscriptionPosition(sourceSpec.getSubscriptionPosition()));
        checkNotNull(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setSubscriptionType(contextImpl.getSubscriptionType());
        pulsarSourceConfig.setTypeClassName(sourceSpec.getTypeClassName());
        if (sourceSpec.getTimeoutMs() > 0) {
            pulsarSourceConfig.setTimeoutMs(sourceSpec.getTimeoutMs());
        }
        if (sourceSpec.getNegativeAckRedeliveryDelayMs() > 0) {
            pulsarSourceConfig.setNegativeAckRedeliveryDelayMs(sourceSpec.getNegativeAckRedeliveryDelayMs());
        }
        if (this.instanceConfig.getFunctionDetails().hasRetryDetails()) {
            pulsarSourceConfig.setMaxMessageRetries(this.instanceConfig.getFunctionDetails().getRetryDetails().getMaxMessageRetries());
            pulsarSourceConfig.setDeadLetterTopic(this.instanceConfig.getFunctionDetails().getRetryDetails().getDeadLetterTopic());
        }
        // that require messages to be put into an immediate queue
        if (pulsarSourceConfig instanceof SingleConsumerPulsarSourceConfig) {
            object = new SingleConsumerPulsarSource(this.client, (SingleConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        } else {
            object = new MultiConsumerPulsarSource(this.client, (MultiConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
        }
    } else {
        // check if source is a batch source
        if (sourceSpec.getClassName().equals(BatchSourceExecutor.class.getName())) {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.instanceClassLoader);
        } else {
            object = Reflections.createInstance(sourceSpec.getClassName(), this.functionClassLoader);
        }
    }
    Class<?>[] typeArgs;
    if (object instanceof Source) {
        typeArgs = TypeResolver.resolveRawArguments(Source.class, object.getClass());
        assert typeArgs.length > 0;
    } else {
        throw new RuntimeException("Source does not implement correct interface");
    }
    this.source = (Source<?>) object;
    if (componentType == org.apache.pulsar.functions.proto.Function.FunctionDetails.ComponentType.SOURCE) {
        Thread.currentThread().setContextClassLoader(this.functionClassLoader);
    }
    try {
        if (sourceSpec.getConfigs().isEmpty()) {
            this.source.open(new HashMap<>(), contextImpl);
        } else {
            this.source.open(ObjectMapperFactory.getThreadLocal().readValue(sourceSpec.getConfigs(), new TypeReference<Map<String, Object>>() {
            }), contextImpl);
        }
        if (this.source instanceof PulsarSource) {
            contextImpl.setInputConsumers(((PulsarSource) this.source).getInputConsumers());
        }
    } catch (Exception e) {
        log.error("Source open produced uncaught exception: ", e);
        throw e;
    } finally {
        Thread.currentThread().setContextClassLoader(this.instanceClassLoader);
    }
}
Also used : SourceSpec(org.apache.pulsar.functions.proto.Function.SourceSpec) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) PulsarSourceConfig(org.apache.pulsar.functions.source.PulsarSourceConfig) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) TreeMap(java.util.TreeMap) MultiConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig) PulsarSource(org.apache.pulsar.functions.source.PulsarSource) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) Source(org.apache.pulsar.io.core.Source) PulsarClientException(org.apache.pulsar.client.api.PulsarClientException) IOException(java.io.IOException) SingleConsumerPulsarSourceConfig(org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig) SingleConsumerPulsarSource(org.apache.pulsar.functions.source.SingleConsumerPulsarSource) BatchSourceExecutor(org.apache.pulsar.functions.source.batch.BatchSourceExecutor) ConsumerConfig(org.apache.pulsar.common.functions.ConsumerConfig) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) MultiConsumerPulsarSource(org.apache.pulsar.functions.source.MultiConsumerPulsarSource)

Aggregations

Map (java.util.Map)4 Source (org.apache.pulsar.io.core.Source)4 TypeReference (com.fasterxml.jackson.core.type.TypeReference)3 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 TreeMap (java.util.TreeMap)3 PulsarClientException (org.apache.pulsar.client.api.PulsarClientException)3 ConsumerConfig (org.apache.pulsar.common.functions.ConsumerConfig)3 SourceSpec (org.apache.pulsar.functions.proto.Function.SourceSpec)3 MultiConsumerPulsarSource (org.apache.pulsar.functions.source.MultiConsumerPulsarSource)3 MultiConsumerPulsarSourceConfig (org.apache.pulsar.functions.source.MultiConsumerPulsarSourceConfig)3 PulsarSource (org.apache.pulsar.functions.source.PulsarSource)3 PulsarSourceConfig (org.apache.pulsar.functions.source.PulsarSourceConfig)3 SingleConsumerPulsarSource (org.apache.pulsar.functions.source.SingleConsumerPulsarSource)3 SingleConsumerPulsarSourceConfig (org.apache.pulsar.functions.source.SingleConsumerPulsarSourceConfig)3 BatchSourceExecutor (org.apache.pulsar.functions.source.batch.BatchSourceExecutor)3 CassandraClient (com.datastax.oss.cdc.CassandraClient)1 CassandraSourceConnectorConfig (com.datastax.oss.cdc.CassandraSourceConnectorConfig)1 ConfigUtil (com.datastax.oss.cdc.ConfigUtil)1 Constants (com.datastax.oss.cdc.Constants)1