use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class UtilsTest method testPrimitiveSerializeRecordToJsonExpandingValue.
@Test
public void testPrimitiveSerializeRecordToJsonExpandingValue() throws Exception {
GenericObject genericObject = new GenericObject() {
@Override
public SchemaType getSchemaType() {
return SchemaType.STRING;
}
@Override
public Object getNativeObject() {
return "message-value";
}
};
Map<String, String> properties = new HashMap<>();
properties.put("prop-key", "prop-value");
Record<GenericObject> genericObjectRecord = new Record<GenericObject>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("data-ks1.table1");
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return Schema.STRING;
}
@Override
public Optional<String> getKey() {
return Optional.of("message-key");
}
@Override
public GenericObject getValue() {
return genericObject;
}
@Override
public Map<String, String> getProperties() {
return properties;
}
@Override
public Optional<Long> getEventTime() {
return Optional.of(1648502845803L);
}
};
ObjectMapper objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL);
String json = Utils.serializeRecordToJsonExpandingValue(objectMapper, genericObjectRecord, false);
assertEquals(json, "{\"topicName\":\"data-ks1.table1\",\"key\":\"message-key\",\"payload\":\"message-value\"," + "\"properties\":{\"prop-key\":\"prop-value\"},\"eventTime\":1648502845803}");
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class UtilsTest method testKeyValueSerializeRecordToJsonExpandingValue.
@Test(dataProvider = "schemaType")
public void testKeyValueSerializeRecordToJsonExpandingValue(SchemaType schemaType) throws Exception {
RecordSchemaBuilder keySchemaBuilder = org.apache.pulsar.client.api.schema.SchemaBuilder.record("key");
keySchemaBuilder.field("a").type(SchemaType.STRING).optional().defaultValue(null);
keySchemaBuilder.field("b").type(SchemaType.INT32).optional().defaultValue(null);
GenericSchema<GenericRecord> keySchema = Schema.generic(keySchemaBuilder.build(schemaType));
GenericRecord keyGenericRecord = keySchema.newRecordBuilder().set("a", "1").set("b", 1).build();
RecordSchemaBuilder valueSchemaBuilder = org.apache.pulsar.client.api.schema.SchemaBuilder.record("value");
valueSchemaBuilder.field("c").type(SchemaType.STRING).optional().defaultValue(null);
valueSchemaBuilder.field("d").type(SchemaType.INT32).optional().defaultValue(null);
RecordSchemaBuilder udtSchemaBuilder = SchemaBuilder.record("type1");
udtSchemaBuilder.field("a").type(SchemaType.STRING).optional().defaultValue(null);
udtSchemaBuilder.field("b").type(SchemaType.BOOLEAN).optional().defaultValue(null);
udtSchemaBuilder.field("d").type(SchemaType.DOUBLE).optional().defaultValue(null);
udtSchemaBuilder.field("f").type(SchemaType.FLOAT).optional().defaultValue(null);
udtSchemaBuilder.field("i").type(SchemaType.INT32).optional().defaultValue(null);
udtSchemaBuilder.field("l").type(SchemaType.INT64).optional().defaultValue(null);
GenericSchema<GenericRecord> udtGenericSchema = Schema.generic(udtSchemaBuilder.build(schemaType));
valueSchemaBuilder.field("e", udtGenericSchema).type(schemaType).optional().defaultValue(null);
GenericSchema<GenericRecord> valueSchema = Schema.generic(valueSchemaBuilder.build(schemaType));
GenericRecord valueGenericRecord = valueSchema.newRecordBuilder().set("c", "1").set("d", 1).set("e", udtGenericSchema.newRecordBuilder().set("a", "a").set("b", true).set("d", 1.0).set("f", 1.0f).set("i", 1).set("l", 10L).build()).build();
Schema<org.apache.pulsar.common.schema.KeyValue<GenericRecord, GenericRecord>> keyValueSchema = Schema.KeyValue(keySchema, valueSchema, KeyValueEncodingType.INLINE);
org.apache.pulsar.common.schema.KeyValue<GenericRecord, GenericRecord> keyValue = new org.apache.pulsar.common.schema.KeyValue<>(keyGenericRecord, valueGenericRecord);
GenericObject genericObject = new GenericObject() {
@Override
public SchemaType getSchemaType() {
return SchemaType.KEY_VALUE;
}
@Override
public Object getNativeObject() {
return keyValue;
}
};
Map<String, String> properties = new HashMap<>();
properties.put("prop-key", "prop-value");
Record<GenericObject> genericObjectRecord = new Record<GenericObject>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("data-ks1.table1");
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return keyValueSchema;
}
@Override
public Optional<String> getKey() {
return Optional.of("message-key");
}
@Override
public GenericObject getValue() {
return genericObject;
}
@Override
public Map<String, String> getProperties() {
return properties;
}
@Override
public Optional<Long> getEventTime() {
return Optional.of(1648502845803L);
}
};
ObjectMapper objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL);
String json = Utils.serializeRecordToJsonExpandingValue(objectMapper, genericObjectRecord, false);
assertEquals(json, "{\"topicName\":\"data-ks1.table1\",\"key\":\"message-key\"," + "\"payload\":{\"value\":{\"c\":\"1\",\"d\":1,\"e\":{\"a\":\"a\",\"b\":true,\"d\":1.0,\"f\":1.0," + "\"i\":1,\"l\":10}},\"key\":{\"a\":\"1\",\"b\":1}},\"properties\":{\"prop-key\":\"prop-value\"}," + "\"eventTime\":1648502845803}");
json = Utils.serializeRecordToJsonExpandingValue(objectMapper, genericObjectRecord, true);
assertEquals(json, "{\"topicName\":\"data-ks1.table1\",\"key\":\"message-key\",\"payload.value.c\":\"1\"," + "\"payload.value.d\":1,\"payload.value.e.a\":\"a\",\"payload.value.e.b\":true,\"payload.value.e" + ".d\":1.0,\"payload.value.e.f\":1.0,\"payload.value.e.i\":1,\"payload.value.e.l\":10,\"payload.key" + ".a\":\"1\",\"payload.key.b\":1,\"properties.prop-key\":\"prop-value\",\"eventTime\":1648502845803}");
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class ElasticSearchAuthTests method ensureCalls.
@SneakyThrows
private void ensureCalls(ElasticSearchClient client, String indexName) {
AtomicInteger ackCount = new AtomicInteger();
assertTrue(client.createIndexIfNeeded(indexName));
Record mockRecord = mock(Record.class);
doAnswer(invocation -> ackCount.incrementAndGet()).when(mockRecord).ack();
assertTrue(client.indexDocument(mockRecord, Pair.of("1", "{\"a\":1}")));
assertTrue(client.deleteDocument(mockRecord, "1"));
client.bulkIndex(mockRecord, Pair.of("1", "{\"a\":1}"));
client.bulkDelete(mockRecord, "1");
client.flush();
assertEquals(ackCount.get(), 4);
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class PulsarSinkTest method testWriteGenericRecords.
private void testWriteGenericRecords(ProcessingGuarantees guarantees) throws Exception {
String defaultTopic = "default";
PulsarSinkConfig sinkConfig = getPulsarConfigs();
sinkConfig.setTopic(defaultTopic);
sinkConfig.setTypeClassName(GenericRecord.class.getName());
sinkConfig.setProcessingGuarantees(guarantees);
PulsarClient client = getPulsarClient();
PulsarSink pulsarSink = new PulsarSink(client, sinkConfig, new HashMap<>(), mock(ComponentStatsManager.class), Thread.currentThread().getContextClassLoader());
pulsarSink.open(new HashMap<>(), mock(SinkContext.class));
if (ProcessingGuarantees.ATMOST_ONCE == guarantees) {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkAtMostOnceProcessor);
} else if (ProcessingGuarantees.ATLEAST_ONCE == guarantees) {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkAtLeastOnceProcessor);
} else {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkEffectivelyOnceProcessor);
}
PulsarSinkProcessorBase processor = (PulsarSinkProcessorBase) pulsarSink.pulsarSinkProcessor;
assertFalse(processor.publishProducers.containsKey(defaultTopic));
String[] topics = { "topic-1", "topic-2", "topic-3" };
for (String topic : topics) {
RecordSchemaBuilder builder = SchemaBuilder.record("MyRecord");
builder.field("number").type(SchemaType.INT32);
builder.field("text").type(SchemaType.STRING);
GenericSchema<GenericRecord> schema = Schema.generic(builder.build(SchemaType.AVRO));
GenericRecordBuilder recordBuilder = schema.newRecordBuilder();
recordBuilder.set("number", 1);
recordBuilder.set("text", topic);
GenericRecord genericRecord = recordBuilder.build();
SinkRecord<GenericRecord> record = new SinkRecord<>(new Record<GenericRecord>() {
@Override
public Optional<String> getDestinationTopic() {
return Optional.of(topic);
}
@Override
public Schema<GenericRecord> getSchema() {
return schema;
}
@Override
public GenericRecord getValue() {
return genericRecord;
}
@Override
public Optional<String> getPartitionId() {
return Optional.of(topic + "-id-1");
}
@Override
public Optional<Long> getRecordSequence() {
return Optional.of(1L);
}
}, genericRecord);
pulsarSink.write(record);
if (ProcessingGuarantees.EFFECTIVELY_ONCE == guarantees) {
assertTrue(processor.publishProducers.containsKey(String.format("%s-%s-id-1", topic, topic)));
} else {
assertTrue(processor.publishProducers.containsKey(topic));
}
verify(client.newProducer(), times(1)).topic(argThat(otherTopic -> topic != null ? topic.equals(otherTopic) : defaultTopic.equals(otherTopic)));
verify(client, times(1)).newProducer(argThat(otherSchema -> Objects.equals(otherSchema, schema)));
}
}
use of org.apache.pulsar.functions.api.Record in project cdc-apache-cassandra by datastax.
the class CassandraSource method batchRead.
@SuppressWarnings("unchecked")
private List<MyKVRecord> batchRead() throws Exception {
batchTotalLatency.set(0);
batchTotalQuery.set(0);
List<MyKVRecord> newRecords = new ArrayList<>();
if (this.queryExecutors == null)
initQueryExecutors();
try {
maybeInitCassandraClient();
// this method will block until we receive at least one record
while (newRecords.size() < this.config.getBatchSize()) {
final Message<KeyValue<GenericRecord, MutationValue>> msg = consumer.receive(1, TimeUnit.SECONDS);
if (msg == null) {
if (!newRecords.isEmpty()) {
log.debug("no message received, buffer size {}", newRecords.size());
// no more records within the timeout, but we have at least one record
break;
} else {
log.debug("no message received");
continue;
}
}
final KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
final GenericRecord mutationKey = kv.getKey();
final MutationValue mutationValue = kv.getValue();
log.debug("Message from producer={} msgId={} key={} value={} schema {}\n", msg.getProducerName(), msg.getMessageId(), kv.getKey(), kv.getValue(), msg.getReaderSchema().orElse(null));
List<Object> pk = (List<Object>) mutationKeyConverter.fromConnectData(mutationKey.getNativeObject());
// ensure the schema is the one used when building the struct.
final ConverterAndQuery converterAndQueryFinal = this.valueConverterAndQuery;
CompletableFuture<KeyValue<Object, Object>> queryResult = new CompletableFuture<>();
// we have to process sequentially the records from the same key
// otherwise our mutation cache will not be enough efficient
// in deduplicating mutations coming from different nodes
executeOrdered(msg.getKey(), () -> {
try {
if (mutationCache.isMutationProcessed(msg.getKey(), mutationValue.getMd5Digest())) {
log.debug("Message key={} md5={} already processed", msg.getKey(), mutationValue.getMd5Digest());
// ignore duplicated mutation
consumer.acknowledge(msg);
queryResult.complete(null);
CacheStats cacheStats = mutationCache.stats();
sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
sourceContext.recordMetric(QUERY_LATENCY, 0);
sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
if (msg.hasProperty(Constants.WRITETIME))
sourceContext.recordMetric(REPLICATION_LATENCY, System.currentTimeMillis() - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
return null;
}
List<Object> nonNullPkValues = pk.stream().filter(e -> e != null).collect(Collectors.toList());
long start = System.currentTimeMillis();
Tuple3<Row, ConsistencyLevel, UUID> tuple = cassandraClient.selectRow(nonNullPkValues, mutationValue.getNodeId(), Lists.newArrayList(ConsistencyLevel.LOCAL_QUORUM, ConsistencyLevel.LOCAL_ONE), getSelectStatement(converterAndQueryFinal, nonNullPkValues.size()), mutationValue.getMd5Digest());
CacheStats cacheStats = mutationCache.stats();
sourceContext.recordMetric(CACHE_HITS, cacheStats.hitCount());
sourceContext.recordMetric(CACHE_MISSES, cacheStats.missCount());
sourceContext.recordMetric(CACHE_EVICTIONS, cacheStats.evictionCount());
sourceContext.recordMetric(CACHE_SIZE, mutationCache.estimatedSize());
long end = System.currentTimeMillis();
sourceContext.recordMetric(QUERY_LATENCY, end - start);
sourceContext.recordMetric(QUERY_EXECUTORS, queryExecutors.size());
batchTotalLatency.addAndGet(end - start);
batchTotalQuery.incrementAndGet();
if (msg.hasProperty(Constants.WRITETIME))
sourceContext.recordMetric(REPLICATION_LATENCY, end - (Long.parseLong(msg.getProperty(Constants.WRITETIME)) / 1000L));
Object value = tuple._1 == null ? null : converterAndQueryFinal.getConverter().toConnectData(tuple._1);
if (ConsistencyLevel.LOCAL_QUORUM.equals(tuple._2()) && (!config.getCacheOnlyIfCoordinatorMatch() || (tuple._3 != null && tuple._3.equals(mutationValue.getNodeId())))) {
log.debug("Caching mutation key={} md5={} pk={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues);
// cache the mutation digest if the coordinator is the source of this event.
mutationCache.addMutationMd5(msg.getKey(), mutationValue.getMd5Digest());
} else {
log.debug("Not caching mutation key={} md5={} pk={} CL={} coordinator={}", msg.getKey(), mutationValue.getMd5Digest(), nonNullPkValues, tuple._2(), tuple._3());
}
queryResult.complete(new KeyValue(msg.getKeyBytes(), value));
} catch (Throwable err) {
queryResult.completeExceptionally(err);
}
return null;
});
final MyKVRecord record = new MyKVRecord(converterAndQueryFinal, queryResult, msg);
newRecords.add(record);
}
Preconditions.checkState(!newRecords.isEmpty(), "Buffer cannot be empty here");
List<MyKVRecord> usefulRecords = new ArrayList<>(newRecords.size());
int cacheHits = 0;
long start = System.currentTimeMillis();
// wait for all queries to complete
for (MyKVRecord record : newRecords) {
KeyValue res = record.keyValue.join();
if (res != null) {
// if the result is "null" the mutation has been discarded
usefulRecords.add(record);
} else {
cacheHits++;
}
}
long duration = System.currentTimeMillis() - start;
long throughput = duration > 0 ? (1000L * newRecords.size()) / duration : 0;
log.debug("Query time for {} msg in {} ms throughput={} msg/s cacheHits={}", newRecords.size(), duration, throughput, cacheHits);
if (batchTotalQuery.get() > 0) {
adjustExecutors();
}
consecutiveUnavailableException = 0;
return usefulRecords;
} catch (CompletionException e) {
Throwable e2 = e.getCause();
if (e2 instanceof ExecutionException) {
e2 = e2.getCause();
}
log.info("CompletionException cause:", e2);
if (e2 instanceof com.datastax.oss.driver.api.core.servererrors.ReadTimeoutException || e2 instanceof com.datastax.oss.driver.api.core.servererrors.OverloadedException) {
decreaseExecutors(e2);
} else if (e2 instanceof com.datastax.oss.driver.api.core.AllNodesFailedException) {
// just retry
} else {
log.warn("Unexpected exception class=" + e.getClass() + " message=" + e.getMessage() + " cause={}" + e.getCause(), e);
throw e;
}
for (MyKVRecord record : newRecords) {
// fail every message in the buffer
negativeAcknowledge(consumer, record.getMsg());
}
backoffRetry(e2);
return Collections.emptyList();
} catch (com.datastax.oss.driver.api.core.AllNodesFailedException e) {
log.info("AllNodesFailedException:", e);
for (MyKVRecord record : newRecords) {
// fail every message in the buffer
negativeAcknowledge(consumer, record.getMsg());
}
backoffRetry(e);
return Collections.emptyList();
} catch (Throwable e) {
log.error("Unrecoverable error:", e);
for (MyKVRecord record : newRecords) {
negativeAcknowledge(consumer, record.getMsg());
}
throw e;
}
}
Aggregations