use of com.datastax.oss.cdc.MutationValue in project cdc-apache-cassandra by datastax.
the class CassandraSource method batchRead.
@SuppressWarnings("unchecked")
private List<MyKVRecord> batchRead() throws Exception {
batchTotalLatency.set(0);
batchTotalQuery.set(0);
List<MyKVRecord> newRecords = new ArrayList<>();
if (this.queryExecutors == null)
initQueryExecutors();
try {
maybeInitCassandraClient();
// this method will block until we receive at least one record
while (newRecords.size() < this.config.getBatchSize()) {
final Message<KeyValue<GenericRecord, MutationValue>> msg = consumer.receive(1, TimeUnit.SECONDS);
if (msg == null) {
if (!newRecords.isEmpty()) {
log.debug("no message received, buffer size {}", newRecords.size());
// no more records within the timeout, but we have at least one record
break;
} else {
log.debug("no message received");
continue;
}
}
final KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
final GenericRecord mutationKey = kv.getKey();
final MutationValue mutationValue = kv.getValue();
log.debug("Message from producer={} msgId={} key={} value={} schema {}\n", msg.getProducerName(), msg.getMessageId(), kv.getKey(), kv.getValue(), msg.getReaderSchema().orElse(null));
List<Object> pk = (List<Object>) mutationKeyConverter.fromConnectData(mutationKey.getNativeObject());
// ensure the schema is the one used when building the struct.
final ConverterAndQuery converterAndQueryFinal = this.valueConverterAndQuery;
CompletableFuture<KeyValue<Object, Object>> queryResult = new CompletableFuture<>();
// we have to process sequentially the records from the same key
// otherwise our mutation cache will not be enough efficient
// in deduplicating mutations coming from different nodes
executeOrdered(msg.getKey(), () -> {
try {
if (mutationCache.isMutationProcessed(msg.getKey(), mutationValue.getMd5Digest())) {
log.debug("Message key={} md5={} already processed", msg.getKey(), mutationValue.getMd5Digest());
// ignore duplicated mutation
consumer.acknowledge(msg);
queryResult.complete(null);
CacheStats cacheStats = mutationCache.stats();
sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
sourceContext.recordMetric(QUERY_LATENCY, 0);
sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
if (msg.hasProperty(Constants.WRITETIME))
sourceContext.recordMetric(REPLICATION_LATENCY, System.currentTimeMillis() - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
return null;
}
List<Object> nonNullPkValues = pk.stream().filter(e -> e != null).collect(Collectors.toList());
long start = System.currentTimeMillis();
Tuple3<Row, ConsistencyLevel, UUID> tuple = cassandraClient.selectRow(nonNullPkValues, mutationValue.getNodeId(), Lists.newArrayList(ConsistencyLevel.LOCAL_QUORUM, ConsistencyLevel.LOCAL_ONE), getSelectStatement(converterAndQueryFinal, nonNullPkValues.size()), mutationValue.getMd5Digest());
CacheStats cacheStats = mutationCache.stats();
sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
long end = System.currentTimeMillis();
sourceContext.recordMetric(QUERY_LATENCY, end - start);
sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
batchTotalLatency.addAndGet(end - start);
batchTotalQuery.incrementAndGet();
if (msg.hasProperty(Constants.WRITETIME))
sourceContext.recordMetric(REPLICATION_LATENCY, end - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
Object value = tuple._1 == null ? null : converterAndQueryFinal.getConverter().toConnectData(tuple._1);
if (ConsistencyLevel.LOCAL_QUORUM.equals(tuple._2()) && (!config.getCacheOnlyIfCoordinatorMatch() || (tuple._3 != null && tuple._3.equals(mutationValue.getNodeId())))) {
log.debug("Caching mutation key={} md5={} pk={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues);
// cache the mutation digest if the coordinator is the source of this event.
mutationCache.addMutationMd5(msg.getKey(), mutationValue.getMd5Digest());
} else {
log.debug("Not caching mutation key={} md5={} pk={} CL={} coordinator={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues, tuple._2(), tuple._3());
}
queryResult.complete(new KeyValue(msg.getKeyBytes(), value));
} catch (Throwable err) {
queryResult.completeExceptionally(err);
}
return null;
});
final MyKVRecord record = new MyKVRecord(converterAndQueryFinal, queryResult, msg);
newRecords.add(record);
}
Preconditions.checkState(!newRecords.isEmpty(), "Buffer cannot be empty here");
List<MyKVRecord> usefulRecords = new ArrayList<>(newRecords.size());
int cacheHits = 0;
long start = System.currentTimeMillis();
// wait for all queries to complete
for (MyKVRecord record : newRecords) {
KeyValue res = record.keyValue.join();
if (res != null) {
// if the result is "null" the mutation has been discarded
usefulRecords.add(record);
} else {
cacheHits++;
}
}
long duration = System.currentTimeMillis() - start;
long throughput = duration > 0 ? (1000L * newRecords.size()) / duration : 0;
log.debug("Query time for {} msg in {} ms throughput={} msg/s cacheHits={}", newRecords.size(), duration, throughput, cacheHits);
if (batchTotalQuery.get() > 0) {
adjustExecutors();
}
consecutiveUnavailableException = 0;
return usefulRecords;
} catch (CompletionException e) {
Throwable e2 = e.getCause();
if (e2 instanceof ExecutionException) {
e2 = e2.getCause();
}
log.info("CompletionException cause:", e2);
if (e2 instanceof com.datastax.oss.driver.api.core.servererrors.ReadTimeoutException || e2 instanceof com.datastax.oss.driver.api.core.servererrors.OverloadedException) {
decreaseExecutors(e2);
} else if (e2 instanceof com.datastax.oss.driver.api.core.AllNodesFailedException) {
// just retry
} else {
log.warn("Unexpected exception class=" + e.getClass() + " message=" + e.getMessage() + " cause={}" + e.getCause(), e);
throw e;
}
for (MyKVRecord record : newRecords) {
// fail every message in the buffer
negativeAcknowledge(consumer, record.getMsg());
}
backoffRetry(e2);
return Collections.emptyList();
} catch (com.datastax.oss.driver.api.core.AllNodesFailedException e) {
log.info("AllNodesFailedException:", e);
for (MyKVRecord record : newRecords) {
// fail every message in the buffer
negativeAcknowledge(consumer, record.getMsg());
}
backoffRetry(e);
return Collections.emptyList();
} catch (Throwable e) {
log.error("Unrecoverable error:", e);
for (MyKVRecord record : newRecords) {
negativeAcknowledge(consumer, record.getMsg());
}
throw e;
}
}
use of com.datastax.oss.cdc.MutationValue in project cdc-apache-cassandra by datastax.
the class AbstractPulsarMutationSender method getProducer.
/**
* Build the Pulsar producer for the provided table metadata.
* @param tm table metadata
* @return the pulsar producer
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public Producer<KeyValue<byte[], MutationValue>> getProducer(final TableInfo tm) throws PulsarClientException {
if (this.client == null) {
synchronized (this) {
if (this.client == null)
initialize(config);
}
}
final TopicAndProducerName topicAndProducerName = topicAndProducerName(tm);
return producers.computeIfAbsent(topicAndProducerName.topicName, k -> {
try {
org.apache.pulsar.client.api.Schema<KeyValue<byte[], MutationValue>> keyValueSchema = org.apache.pulsar.client.api.Schema.KeyValue(new AvroSchemaWrapper(getAvroKeySchema(tm).schema), org.apache.pulsar.client.api.Schema.AVRO(MutationValue.class), KeyValueEncodingType.SEPARATED);
ProducerBuilder<KeyValue<byte[], MutationValue>> producerBuilder = client.newProducer(keyValueSchema).producerName(topicAndProducerName.producerName).topic(k).sendTimeout(0, TimeUnit.SECONDS).hashingScheme(HashingScheme.Murmur3_32Hash).blockIfQueueFull(true).maxPendingMessages(config.pulsarMaxPendingMessages).maxPendingMessagesAcrossPartitions(config.pulsarMaxPendingMessagesAcrossPartitions).autoUpdatePartitions(true);
if (config.pulsarBatchDelayInMs > 0) {
producerBuilder.enableBatching(true).batchingMaxPublishDelay(config.pulsarBatchDelayInMs, TimeUnit.MILLISECONDS);
} else {
producerBuilder.enableBatching(false);
}
if (config.pulsarKeyBasedBatcher) {
// only for single non-partitioned topic and Key_Shared subscription source connector
producerBuilder.batcherBuilder(BatcherBuilder.KEY_BASED);
}
if (useMurmur3Partitioner) {
producerBuilder.messageRoutingMode(MessageRoutingMode.CustomPartition).messageRouter(Murmur3MessageRouter.instance);
}
log.info("Pulsar producer name={} created with batching delay={}ms", topicAndProducerName.producerName, config.pulsarBatchDelayInMs);
return producerBuilder.create();
} catch (Exception e) {
log.error("Failed to get a pulsar producer", e);
throw new RuntimeException(e);
}
});
}
Aggregations