Search in sources :

Example 1 with MutationValue

use of com.datastax.oss.cdc.MutationValue in project cdc-apache-cassandra by datastax.

the class CassandraSource method batchRead.

@SuppressWarnings("unchecked")
private List<MyKVRecord> batchRead() throws Exception {
    batchTotalLatency.set(0);
    batchTotalQuery.set(0);
    List<MyKVRecord> newRecords = new ArrayList<>();
    if (this.queryExecutors == null)
        initQueryExecutors();
    try {
        maybeInitCassandraClient();
        // this method will block until we receive at least one record
        while (newRecords.size() < this.config.getBatchSize()) {
            final Message<KeyValue<GenericRecord, MutationValue>> msg = consumer.receive(1, TimeUnit.SECONDS);
            if (msg == null) {
                if (!newRecords.isEmpty()) {
                    log.debug("no message received, buffer size {}", newRecords.size());
                    // no more records within the timeout, but we have at least one record
                    break;
                } else {
                    log.debug("no message received");
                    continue;
                }
            }
            final KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
            final GenericRecord mutationKey = kv.getKey();
            final MutationValue mutationValue = kv.getValue();
            log.debug("Message from producer={} msgId={} key={} value={} schema {}\n", msg.getProducerName(), msg.getMessageId(), kv.getKey(), kv.getValue(), msg.getReaderSchema().orElse(null));
            List<Object> pk = (List<Object>) mutationKeyConverter.fromConnectData(mutationKey.getNativeObject());
            // ensure the schema is the one used when building the struct.
            final ConverterAndQuery converterAndQueryFinal = this.valueConverterAndQuery;
            CompletableFuture<KeyValue<Object, Object>> queryResult = new CompletableFuture<>();
            // we have to process sequentially the records from the same key
            // otherwise our mutation cache will not be enough efficient
            // in deduplicating mutations coming from different nodes
            executeOrdered(msg.getKey(), () -> {
                try {
                    if (mutationCache.isMutationProcessed(msg.getKey(), mutationValue.getMd5Digest())) {
                        log.debug("Message key={} md5={} already processed", msg.getKey(), mutationValue.getMd5Digest());
                        // ignore duplicated mutation
                        consumer.acknowledge(msg);
                        queryResult.complete(null);
                        CacheStats cacheStats = mutationCache.stats();
                        sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
                        sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
                        sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
                        sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
                        sourceContext.recordMetric(QUERY_LATENCY, 0);
                        sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
                        if (msg.hasProperty(Constants.WRITETIME))
                            sourceContext.recordMetric(REPLICATION_LATENCY, System.currentTimeMillis() - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
                        return null;
                    }
                    List<Object> nonNullPkValues = pk.stream().filter(e -> e != null).collect(Collectors.toList());
                    long start = System.currentTimeMillis();
                    Tuple3<Row, ConsistencyLevel, UUID> tuple = cassandraClient.selectRow(nonNullPkValues, mutationValue.getNodeId(), Lists.newArrayList(ConsistencyLevel.LOCAL_QUORUM, ConsistencyLevel.LOCAL_ONE), getSelectStatement(converterAndQueryFinal, nonNullPkValues.size()), mutationValue.getMd5Digest());
                    CacheStats cacheStats = mutationCache.stats();
                    sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
                    sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
                    sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
                    sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
                    long end = System.currentTimeMillis();
                    sourceContext.recordMetric(QUERY_LATENCY, end - start);
                    sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
                    batchTotalLatency.addAndGet(end - start);
                    batchTotalQuery.incrementAndGet();
                    if (msg.hasProperty(Constants.WRITETIME))
                        sourceContext.recordMetric(REPLICATION_LATENCY, end - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
                    Object value = tuple._1 == null ? null : converterAndQueryFinal.getConverter().toConnectData(tuple._1);
                    if (ConsistencyLevel.LOCAL_QUORUM.equals(tuple._2()) && (!config.getCacheOnlyIfCoordinatorMatch() || (tuple._3 != null && tuple._3.equals(mutationValue.getNodeId())))) {
                        log.debug("Caching mutation key={} md5={} pk={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues);
                        // cache the mutation digest if the coordinator is the source of this event.
                        mutationCache.addMutationMd5(msg.getKey(), mutationValue.getMd5Digest());
                    } else {
                        log.debug("Not caching mutation key={} md5={} pk={} CL={} coordinator={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues, tuple._2(), tuple._3());
                    }
                    queryResult.complete(new KeyValue(msg.getKeyBytes(), value));
                } catch (Throwable err) {
                    queryResult.completeExceptionally(err);
                }
                return null;
            });
            final MyKVRecord record = new MyKVRecord(converterAndQueryFinal, queryResult, msg);
            newRecords.add(record);
        }
        Preconditions.checkState(!newRecords.isEmpty(), "Buffer cannot be empty here");
        List<MyKVRecord> usefulRecords = new ArrayList<>(newRecords.size());
        int cacheHits = 0;
        long start = System.currentTimeMillis();
        // wait for all queries to complete
        for (MyKVRecord record : newRecords) {
            KeyValue res = record.keyValue.join();
            if (res != null) {
                // if the result is "null" the mutation has been discarded
                usefulRecords.add(record);
            } else {
                cacheHits++;
            }
        }
        long duration = System.currentTimeMillis() - start;
        long throughput = duration > 0 ? (1000L * newRecords.size()) / duration : 0;
        log.debug("Query time for {} msg in {} ms throughput={} msg/s cacheHits={}", newRecords.size(), duration, throughput, cacheHits);
        if (batchTotalQuery.get() > 0) {
            adjustExecutors();
        }
        consecutiveUnavailableException = 0;
        return usefulRecords;
    } catch (CompletionException e) {
        Throwable e2 = e.getCause();
        if (e2 instanceof ExecutionException) {
            e2 = e2.getCause();
        }
        log.info("CompletionException cause:", e2);
        if (e2 instanceof com.datastax.oss.driver.api.core.servererrors.ReadTimeoutException || e2 instanceof com.datastax.oss.driver.api.core.servererrors.OverloadedException) {
            decreaseExecutors(e2);
        } else if (e2 instanceof com.datastax.oss.driver.api.core.AllNodesFailedException) {
        // just retry
        } else {
            log.warn("Unexpected exception class=" + e.getClass() + " message=" + e.getMessage() + " cause={}" + e.getCause(), e);
            throw e;
        }
        for (MyKVRecord record : newRecords) {
            // fail every message in the buffer
            negativeAcknowledge(consumer, record.getMsg());
        }
        backoffRetry(e2);
        return Collections.emptyList();
    } catch (com.datastax.oss.driver.api.core.AllNodesFailedException e) {
        log.info("AllNodesFailedException:", e);
        for (MyKVRecord record : newRecords) {
            // fail every message in the buffer
            negativeAcknowledge(consumer, record.getMsg());
        }
        backoffRetry(e);
        return Collections.emptyList();
    } catch (Throwable e) {
        log.error("Unrecoverable error:", e);
        for (MyKVRecord record : newRecords) {
            negativeAcknowledge(consumer, record.getMsg());
        }
        throw e;
    }
}
Also used : Arrays(java.util.Arrays) CqlLogicalTypes(com.datastax.oss.cdc.CqlLogicalTypes) KVRecord(org.apache.pulsar.functions.api.KVRecord) SneakyThrows(lombok.SneakyThrows) SubscriptionMode(org.apache.pulsar.client.api.SubscriptionMode) SourceContext(org.apache.pulsar.io.core.SourceContext) Future(java.util.concurrent.Future) CassandraClient(com.datastax.oss.cdc.CassandraClient) Locale(java.util.Locale) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Duration(java.time.Duration) Map(java.util.Map) ConsistencyLevel(com.datastax.oss.driver.api.core.ConsistencyLevel) KeySharedPolicy(org.apache.pulsar.client.api.KeySharedPolicy) Version(com.datastax.oss.cdc.Version) MutationValue(com.datastax.oss.cdc.MutationValue) Record(org.apache.pulsar.functions.api.Record) SpecificData(org.apache.avro.specific.SpecificData) Conversions(org.apache.avro.Conversions) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Connector(org.apache.pulsar.io.core.annotations.Connector) InvocationTargetException(java.lang.reflect.InvocationTargetException) AggregateMetadata(com.datastax.oss.driver.api.core.metadata.schema.AggregateMetadata) Objects(java.util.Objects) Consumer(org.apache.pulsar.client.api.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) KeyspaceMetadata(com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata) Slf4j(lombok.extern.slf4j.Slf4j) List(java.util.List) IOType(org.apache.pulsar.io.core.annotations.IOType) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) CacheStats(com.github.benmanes.caffeine.cache.stats.CacheStats) ConsumerBuilder(org.apache.pulsar.client.api.ConsumerBuilder) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) Message(org.apache.pulsar.client.api.Message) ViewMetadata(com.datastax.oss.driver.api.core.metadata.schema.ViewMetadata) SubscriptionInitialPosition(org.apache.pulsar.client.api.SubscriptionInitialPosition) ArrayList(java.util.ArrayList) KeyValue(org.apache.pulsar.common.schema.KeyValue) Strings(com.google.common.base.Strings) Lists(com.google.common.collect.Lists) NativeAvroConverter(com.datastax.oss.pulsar.source.converters.NativeAvroConverter) KeyValueEncodingType(org.apache.pulsar.common.schema.KeyValueEncodingType) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) MutationCache(com.datastax.oss.cdc.MutationCache) Source(org.apache.pulsar.io.core.Source) Row(com.datastax.oss.driver.api.core.cql.Row) ExecutorService(java.util.concurrent.ExecutorService) CassandraSourceConnectorConfig(com.datastax.oss.cdc.CassandraSourceConnectorConfig) TableMetadata(com.datastax.oss.driver.api.core.metadata.schema.TableMetadata) ConfigUtil(com.datastax.oss.cdc.ConfigUtil) PreparedStatement(com.datastax.oss.driver.api.core.cql.PreparedStatement) SubscriptionType(org.apache.pulsar.client.api.SubscriptionType) Constants(com.datastax.oss.cdc.Constants) ColumnMetadata(com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata) Schema(org.apache.pulsar.client.api.Schema) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) UserDefinedType(com.datastax.oss.driver.api.core.type.UserDefinedType) AtomicLong(java.util.concurrent.atomic.AtomicLong) Tuple2(io.vavr.Tuple2) SchemaChangeListener(com.datastax.oss.driver.api.core.metadata.schema.SchemaChangeListener) Tuple3(io.vavr.Tuple3) FunctionMetadata(com.datastax.oss.driver.api.core.metadata.schema.FunctionMetadata) Preconditions(com.google.common.base.Preconditions) Collections(java.util.Collections) KeyValue(org.apache.pulsar.common.schema.KeyValue) ArrayList(java.util.ArrayList) ConsistencyLevel(com.datastax.oss.driver.api.core.ConsistencyLevel) CompletableFuture(java.util.concurrent.CompletableFuture) List(java.util.List) ArrayList(java.util.ArrayList) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) UUID(java.util.UUID) ExecutionException(java.util.concurrent.ExecutionException) MutationValue(com.datastax.oss.cdc.MutationValue) CompletionException(java.util.concurrent.CompletionException) CacheStats(com.github.benmanes.caffeine.cache.stats.CacheStats) Row(com.datastax.oss.driver.api.core.cql.Row)

Example 2 with MutationValue

use of com.datastax.oss.cdc.MutationValue in project cdc-apache-cassandra by datastax.

the class AbstractPulsarMutationSender method getProducer.

/**
 * Build the Pulsar producer for the provided table metadata.
 * @param tm table metadata
 * @return the pulsar producer
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public Producer<KeyValue<byte[], MutationValue>> getProducer(final TableInfo tm) throws PulsarClientException {
    if (this.client == null) {
        synchronized (this) {
            if (this.client == null)
                initialize(config);
        }
    }
    final TopicAndProducerName topicAndProducerName = topicAndProducerName(tm);
    return producers.computeIfAbsent(topicAndProducerName.topicName, k -> {
        try {
            org.apache.pulsar.client.api.Schema<KeyValue<byte[], MutationValue>> keyValueSchema = org.apache.pulsar.client.api.Schema.KeyValue(new AvroSchemaWrapper(getAvroKeySchema(tm).schema), org.apache.pulsar.client.api.Schema.AVRO(MutationValue.class), KeyValueEncodingType.SEPARATED);
            ProducerBuilder<KeyValue<byte[], MutationValue>> producerBuilder = client.newProducer(keyValueSchema).producerName(topicAndProducerName.producerName).topic(k).sendTimeout(0, TimeUnit.SECONDS).hashingScheme(HashingScheme.Murmur3_32Hash).blockIfQueueFull(true).maxPendingMessages(config.pulsarMaxPendingMessages).maxPendingMessagesAcrossPartitions(config.pulsarMaxPendingMessagesAcrossPartitions).autoUpdatePartitions(true);
            if (config.pulsarBatchDelayInMs > 0) {
                producerBuilder.enableBatching(true).batchingMaxPublishDelay(config.pulsarBatchDelayInMs, TimeUnit.MILLISECONDS);
            } else {
                producerBuilder.enableBatching(false);
            }
            if (config.pulsarKeyBasedBatcher) {
                // only for single non-partitioned topic and Key_Shared subscription source connector
                producerBuilder.batcherBuilder(BatcherBuilder.KEY_BASED);
            }
            if (useMurmur3Partitioner) {
                producerBuilder.messageRoutingMode(MessageRoutingMode.CustomPartition).messageRouter(Murmur3MessageRouter.instance);
            }
            log.info("Pulsar producer name={} created with batching delay={}ms", topicAndProducerName.producerName, config.pulsarBatchDelayInMs);
            return producerBuilder.create();
        } catch (Exception e) {
            log.error("Failed to get a pulsar producer", e);
            throw new RuntimeException(e);
        }
    });
}
Also used : KeyValue(org.apache.pulsar.common.schema.KeyValue) AvroSchemaWrapper(com.datastax.oss.cdc.AvroSchemaWrapper) MutationValue(com.datastax.oss.cdc.MutationValue) org.apache.pulsar.client.api(org.apache.pulsar.client.api) CassandraConnectorSchemaException(com.datastax.oss.cdc.agent.exceptions.CassandraConnectorSchemaException) IOException(java.io.IOException)

Aggregations

MutationValue (com.datastax.oss.cdc.MutationValue)2 KeyValue (org.apache.pulsar.common.schema.KeyValue)2 AvroSchemaWrapper (com.datastax.oss.cdc.AvroSchemaWrapper)1 CassandraClient (com.datastax.oss.cdc.CassandraClient)1 CassandraSourceConnectorConfig (com.datastax.oss.cdc.CassandraSourceConnectorConfig)1 ConfigUtil (com.datastax.oss.cdc.ConfigUtil)1 Constants (com.datastax.oss.cdc.Constants)1 CqlLogicalTypes (com.datastax.oss.cdc.CqlLogicalTypes)1 MutationCache (com.datastax.oss.cdc.MutationCache)1 Version (com.datastax.oss.cdc.Version)1 CassandraConnectorSchemaException (com.datastax.oss.cdc.agent.exceptions.CassandraConnectorSchemaException)1 ConsistencyLevel (com.datastax.oss.driver.api.core.ConsistencyLevel)1 PreparedStatement (com.datastax.oss.driver.api.core.cql.PreparedStatement)1 Row (com.datastax.oss.driver.api.core.cql.Row)1 AggregateMetadata (com.datastax.oss.driver.api.core.metadata.schema.AggregateMetadata)1 ColumnMetadata (com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata)1 FunctionMetadata (com.datastax.oss.driver.api.core.metadata.schema.FunctionMetadata)1 KeyspaceMetadata (com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata)1 SchemaChangeListener (com.datastax.oss.driver.api.core.metadata.schema.SchemaChangeListener)1 TableMetadata (com.datastax.oss.driver.api.core.metadata.schema.TableMetadata)1