use of org.apache.pulsar.client.api.schema.RecordSchemaBuilder in project pulsar by yahoo.
the class PulsarSinkTest method testWriteGenericRecords.
private void testWriteGenericRecords(ProcessingGuarantees guarantees) throws Exception {
String defaultTopic = "default";
PulsarSinkConfig sinkConfig = getPulsarConfigs();
sinkConfig.setTopic(defaultTopic);
sinkConfig.setTypeClassName(GenericRecord.class.getName());
sinkConfig.setProcessingGuarantees(guarantees);
PulsarClient client = getPulsarClient();
PulsarSink pulsarSink = new PulsarSink(client, sinkConfig, new HashMap<>(), mock(ComponentStatsManager.class), Thread.currentThread().getContextClassLoader());
pulsarSink.open(new HashMap<>(), mock(SinkContext.class));
if (ProcessingGuarantees.ATMOST_ONCE == guarantees) {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkAtMostOnceProcessor);
} else if (ProcessingGuarantees.ATLEAST_ONCE == guarantees) {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkAtLeastOnceProcessor);
} else {
assertTrue(pulsarSink.pulsarSinkProcessor instanceof PulsarSink.PulsarSinkEffectivelyOnceProcessor);
}
PulsarSinkProcessorBase processor = (PulsarSinkProcessorBase) pulsarSink.pulsarSinkProcessor;
assertFalse(processor.publishProducers.containsKey(defaultTopic));
String[] topics = { "topic-1", "topic-2", "topic-3" };
for (String topic : topics) {
RecordSchemaBuilder builder = SchemaBuilder.record("MyRecord");
builder.field("number").type(SchemaType.INT32);
builder.field("text").type(SchemaType.STRING);
GenericSchema<GenericRecord> schema = Schema.generic(builder.build(SchemaType.AVRO));
GenericRecordBuilder recordBuilder = schema.newRecordBuilder();
recordBuilder.set("number", 1);
recordBuilder.set("text", topic);
GenericRecord genericRecord = recordBuilder.build();
SinkRecord<GenericRecord> record = new SinkRecord<>(new Record<GenericRecord>() {
@Override
public Optional<String> getDestinationTopic() {
return Optional.of(topic);
}
@Override
public Schema<GenericRecord> getSchema() {
return schema;
}
@Override
public GenericRecord getValue() {
return genericRecord;
}
@Override
public Optional<String> getPartitionId() {
return Optional.of(topic + "-id-1");
}
@Override
public Optional<Long> getRecordSequence() {
return Optional.of(1L);
}
}, genericRecord);
pulsarSink.write(record);
if (ProcessingGuarantees.EFFECTIVELY_ONCE == guarantees) {
assertTrue(processor.publishProducers.containsKey(String.format("%s-%s-id-1", topic, topic)));
} else {
assertTrue(processor.publishProducers.containsKey(topic));
}
verify(client.newProducer(), times(1)).topic(argThat(otherTopic -> topic != null ? topic.equals(otherTopic) : defaultTopic.equals(otherTopic)));
verify(client, times(1)).newProducer(argThat(otherSchema -> Objects.equals(otherSchema, schema)));
}
}
use of org.apache.pulsar.client.api.schema.RecordSchemaBuilder in project cdc-apache-cassandra by datastax.
the class PulsarDualNodeTests method testProducer.
@Test
public void testProducer() throws InterruptedException, IOException {
String pulsarServiceUrl = "pulsar://pulsar:" + pulsarContainer.BROKER_PORT;
try (CassandraContainer<?> cassandraContainer1 = createCassandraContainer(1, pulsarServiceUrl, testNetwork);
CassandraContainer<?> cassandraContainer2 = createCassandraContainer(2, pulsarServiceUrl, testNetwork)) {
cassandraContainer1.start();
cassandraContainer2.start();
try (CqlSession cqlSession = cassandraContainer1.getCqlSession()) {
cqlSession.execute("CREATE KEYSPACE IF NOT EXISTS ks1 WITH replication = {'class':'SimpleStrategy','replication_factor':'2'};");
cqlSession.execute("CREATE TABLE IF NOT EXISTS ks1.table1 (id text PRIMARY KEY, a int) WITH cdc=true");
cqlSession.execute("INSERT INTO ks1.table1 (id, a) VALUES('1',1)");
cqlSession.execute("INSERT INTO ks1.table1 (id, a) VALUES('2',1)");
cqlSession.execute("INSERT INTO ks1.table1 (id, a) VALUES('3',1)");
cqlSession.execute("CREATE TABLE IF NOT EXISTS ks1.table2 (a text, b int, c int, PRIMARY KEY(a,b)) WITH cdc=true");
cqlSession.execute("INSERT INTO ks1.table2 (a,b,c) VALUES('1',1,1)");
cqlSession.execute("INSERT INTO ks1.table2 (a,b,c) VALUES('2',1,1)");
cqlSession.execute("INSERT INTO ks1.table2 (a,b,c) VALUES('3',1,1)");
}
drain(cassandraContainer1, cassandraContainer2);
Map<String, List<UUID>> nodesTable1 = new HashMap<>();
Map<String, List<UUID>> nodesTable2 = new HashMap<>();
Map<String, List<String>> digestsTable1 = new HashMap<>();
Map<String, List<String>> digestsTable2 = new HashMap<>();
try (PulsarClient pulsarClient = PulsarClient.builder().serviceUrl(pulsarContainer.getPulsarBrokerUrl()).build()) {
RecordSchemaBuilder recordSchemaBuilder1 = SchemaBuilder.record("ks1.table1");
recordSchemaBuilder1.field("id").type(SchemaType.STRING).required();
SchemaInfo keySchemaInfo1 = recordSchemaBuilder1.build(SchemaType.AVRO);
Schema<GenericRecord> keySchema1 = Schema.generic(keySchemaInfo1);
Schema<KeyValue<GenericRecord, MutationValue>> schema1 = Schema.KeyValue(keySchema1, Schema.AVRO(MutationValue.class), KeyValueEncodingType.SEPARATED);
// pulsar-admin topics peek-messages persistent://public/default/events-ks1.table1-partition-0 --count 3 --subscription sub1
try (Consumer<KeyValue<GenericRecord, MutationValue>> consumer = pulsarClient.newConsumer(schema1).topic("events-ks1.table1").subscriptionName("sub1").subscriptionType(SubscriptionType.Key_Shared).subscriptionMode(SubscriptionMode.Durable).subscriptionInitialPosition(SubscriptionInitialPosition.Earliest).subscribe()) {
Message<KeyValue<GenericRecord, MutationValue>> msg;
while ((msg = consumer.receive(30, TimeUnit.SECONDS)) != null && nodesTable1.values().stream().mapToInt(List::size).sum() < 6) {
KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
GenericRecord key = kv.getKey();
MutationValue val = kv.getValue();
System.out.println("Consumer Record: topicName=" + msg.getTopicName() + " key=" + AgentTestUtil.genericRecordToString(key) + " value=" + val);
List<UUID> nodes = nodesTable1.computeIfAbsent((String) key.getField("id"), k -> new ArrayList<>());
nodes.add(val.getNodeId());
List<String> digests = digestsTable1.computeIfAbsent((String) key.getField("id"), k -> new ArrayList<>());
digests.add(val.getMd5Digest());
consumer.acknowledgeAsync(msg);
}
}
// check we have exactly one mutation per node for each key.
for (int i = 1; i < 4; i++) {
Assert.assertNotNull(nodesTable1.get(Integer.toString(i)));
assertEquals(2, nodesTable1.get(Integer.toString(i)).size());
assertEquals(2, nodesTable1.get(Integer.toString(i)).stream().collect(Collectors.toSet()).size());
}
// check we have exactly 2 identical digests.
for (int i = 1; i < 4; i++) {
Assert.assertNotNull(digestsTable1.get(Integer.toString(i)));
assertEquals(2, digestsTable1.get(Integer.toString(i)).size());
assertEquals(1, digestsTable1.get(Integer.toString(i)).stream().collect(Collectors.toSet()).size());
}
// pulsar-admin schemas get "persistent://public/default/events-ks1.table2"
// pulsar-admin topics peek-messages persistent://public/default/events-ks1.table2-partition-0 --count 3 --subscription sub1
RecordSchemaBuilder recordSchemaBuilder2 = SchemaBuilder.record("ks1.table2");
recordSchemaBuilder2.field("a").type(SchemaType.STRING).required();
recordSchemaBuilder2.field("b").type(SchemaType.INT32).optional().defaultValue(null);
SchemaInfo keySchemaInfo2 = recordSchemaBuilder2.build(SchemaType.AVRO);
Schema<GenericRecord> keySchema2 = Schema.generic(keySchemaInfo2);
Schema<KeyValue<GenericRecord, MutationValue>> schema2 = Schema.KeyValue(keySchema2, Schema.AVRO(MutationValue.class), KeyValueEncodingType.SEPARATED);
try (Consumer<KeyValue<GenericRecord, MutationValue>> consumer = pulsarClient.newConsumer(schema2).topic("events-ks1.table2").subscriptionName("sub1").subscriptionType(SubscriptionType.Key_Shared).subscriptionMode(SubscriptionMode.Durable).subscriptionInitialPosition(SubscriptionInitialPosition.Earliest).subscribe()) {
Message<KeyValue<GenericRecord, MutationValue>> msg;
while ((msg = consumer.receive(30, TimeUnit.SECONDS)) != null && nodesTable2.values().stream().mapToInt(List::size).sum() < 6) {
KeyValue<GenericRecord, MutationValue> kv = msg.getValue();
GenericRecord key = kv.getKey();
MutationValue val = kv.getValue();
System.out.println("Consumer Record: topicName=" + msg.getTopicName() + " key=" + AgentTestUtil.genericRecordToString(key) + " value=" + val);
assertEquals(1, key.getField("b"));
List<UUID> nodes = nodesTable2.computeIfAbsent((String) key.getField("a"), k -> new ArrayList<>());
nodes.add(val.getNodeId());
List<String> digests = digestsTable2.computeIfAbsent((String) key.getField("a"), k -> new ArrayList<>());
digests.add(val.getMd5Digest());
consumer.acknowledgeAsync(msg);
}
}
// check we have exactly one mutation per node for each key.
for (int i = 1; i < 4; i++) {
assertEquals(2, nodesTable2.get(Integer.toString(i)).size());
assertEquals(2, nodesTable2.get(Integer.toString(i)).stream().collect(Collectors.toSet()).size());
}
// check we have exactly 2 identical digests.
for (int i = 1; i < 4; i++) {
assertEquals(2, digestsTable2.get(Integer.toString(i)).size());
assertEquals(1, digestsTable2.get(Integer.toString(i)).stream().collect(Collectors.toSet()).size());
}
}
}
}
use of org.apache.pulsar.client.api.schema.RecordSchemaBuilder in project incubator-pulsar by apache.
the class SqliteJdbcSinkTest method testKeyValueSchema.
@Test(dataProvider = "schemaType")
public void testKeyValueSchema(SchemaType schemaType) throws Exception {
RecordSchemaBuilder keySchemaBuilder = org.apache.pulsar.client.api.schema.SchemaBuilder.record("key");
keySchemaBuilder.field("key").type(SchemaType.STRING).optional().defaultValue(null);
GenericSchema<GenericRecord> keySchema = Schema.generic(keySchemaBuilder.build(schemaType));
GenericRecord keyGenericRecord = keySchema.newRecordBuilder().set("key", "mykey").build();
RecordSchemaBuilder valueSchemaBuilder = org.apache.pulsar.client.api.schema.SchemaBuilder.record("value");
valueSchemaBuilder.field("string").type(SchemaType.STRING).optional().defaultValue(null);
valueSchemaBuilder.field("stringutf8").type(SchemaType.STRING).optional().defaultValue(null);
valueSchemaBuilder.field("int").type(SchemaType.INT32).optional().defaultValue(null);
valueSchemaBuilder.field("bool").type(SchemaType.BOOLEAN).optional().defaultValue(null);
valueSchemaBuilder.field("double").type(SchemaType.DOUBLE).optional().defaultValue(null);
valueSchemaBuilder.field("float").type(SchemaType.FLOAT).optional().defaultValue(null);
valueSchemaBuilder.field("long").type(SchemaType.INT64).optional().defaultValue(null);
GenericSchema<GenericRecord> valueSchema = Schema.generic(valueSchemaBuilder.build(schemaType));
GenericRecord valueGenericRecord = valueSchema.newRecordBuilder().set("string", "thestring").set("stringutf8", schemaType == SchemaType.AVRO ? new Utf8("thestringutf8") : "thestringutf8").set("int", Integer.MAX_VALUE).set("bool", true).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("long", Long.MIN_VALUE).build();
Schema<KeyValue<GenericRecord, GenericRecord>> keyValueSchema = Schema.KeyValue(keySchema, valueSchema, KeyValueEncodingType.INLINE);
KeyValue<GenericRecord, GenericRecord> keyValue = new KeyValue<>(keyGenericRecord, valueGenericRecord);
GenericObject genericObject = new GenericObject() {
@Override
public SchemaType getSchemaType() {
return SchemaType.KEY_VALUE;
}
@Override
public Object getNativeObject() {
return keyValue;
}
};
Record<GenericObject> genericObjectRecord = new Record<>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("topic");
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return keyValueSchema;
}
@Override
public GenericObject getValue() {
return genericObject;
}
};
jdbcSink.close();
sqliteUtils.createTable("CREATE TABLE kvtable (" + " key TEXT," + " int INTEGER," + " string TEXT," + " stringutf8 TEXT," + " nulltext TEXT," + " bool NUMERIC," + " double NUMERIC," + " float NUMERIC," + " long INTEGER," + "PRIMARY KEY (key));");
String jdbcUrl = sqliteUtils.sqliteUri();
Map<String, Object> conf = Maps.newHashMap();
conf.put("jdbcUrl", jdbcUrl);
conf.put("tableName", "kvtable");
conf.put("key", "key");
conf.put("nonKey", "long,int,double,float,bool,nulltext,string,stringutf8");
// change batchSize to 1, to flush on each write.
conf.put("batchSize", 1);
try (SqliteJdbcAutoSchemaSink kvSchemaJdbcSink = new SqliteJdbcAutoSchemaSink()) {
kvSchemaJdbcSink.open(conf, null);
kvSchemaJdbcSink.write(genericObjectRecord);
Awaitility.await().untilAsserted(() -> {
final int count = sqliteUtils.select("select int,string,stringutf8,bool,double,float," + "long,nulltext from kvtable where key='mykey'", (resultSet) -> {
int index = 1;
Assert.assertEquals(resultSet.getInt(index++), Integer.MAX_VALUE);
Assert.assertEquals(resultSet.getString(index++), "thestring");
Assert.assertEquals(resultSet.getString(index++), "thestringutf8");
Assert.assertEquals(resultSet.getBoolean(index++), true);
Assert.assertEquals(resultSet.getDouble(index++), Double.MAX_VALUE);
Assert.assertEquals(resultSet.getFloat(index++), Float.MAX_VALUE);
Assert.assertEquals(resultSet.getLong(index++), Long.MIN_VALUE);
Assert.assertNull(resultSet.getString(index++));
});
Assert.assertEquals(count, 1);
});
}
}
use of org.apache.pulsar.client.api.schema.RecordSchemaBuilder in project incubator-pulsar by apache.
the class ElasticSearchExtractTests method getKeyValueGenericObject.
private Record<GenericObject> getKeyValueGenericObject(SchemaType schemaType, GenericSchema<GenericRecord> keySchema, GenericRecord keyGenericRecord) {
RecordSchemaBuilder valueSchemaBuilder = SchemaBuilder.record("value");
valueSchemaBuilder.field("value").type(SchemaType.STRING);
GenericSchema<GenericRecord> valueSchema = Schema.generic(valueSchemaBuilder.build(schemaType));
GenericRecord valueGenericRecord = valueSchema.newRecordBuilder().set("value", "value").build();
Schema<KeyValue<GenericRecord, GenericRecord>> keyValueSchema = Schema.KeyValue(keySchema, valueSchema, KeyValueEncodingType.INLINE);
KeyValue<GenericRecord, GenericRecord> keyValue = new KeyValue<>(keyGenericRecord, valueGenericRecord);
GenericObject genericObject = new GenericObject() {
@Override
public SchemaType getSchemaType() {
return SchemaType.KEY_VALUE;
}
@Override
public Object getNativeObject() {
return keyValue;
}
};
Record<GenericObject> genericObjectRecord = new Record<GenericObject>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("data-ks1.table1");
}
@Override
public Schema getSchema() {
return keyValueSchema;
}
@Override
public GenericObject getValue() {
return genericObject;
}
};
return genericObjectRecord;
}
use of org.apache.pulsar.client.api.schema.RecordSchemaBuilder in project incubator-pulsar by apache.
the class ElasticSearchExtractTests method testGenericRecord.
@Test(dataProvider = "schemaType")
public void testGenericRecord(SchemaType schemaType) throws Exception {
RecordSchemaBuilder valueSchemaBuilder = org.apache.pulsar.client.api.schema.SchemaBuilder.record("value");
valueSchemaBuilder.field("c").type(SchemaType.STRING).optional().defaultValue(null);
valueSchemaBuilder.field("d").type(SchemaType.INT32).optional().defaultValue(null);
RecordSchemaBuilder udtSchemaBuilder = SchemaBuilder.record("type1");
udtSchemaBuilder.field("a").type(SchemaType.STRING).optional().defaultValue(null);
udtSchemaBuilder.field("b").type(SchemaType.BOOLEAN).optional().defaultValue(null);
udtSchemaBuilder.field("d").type(SchemaType.DOUBLE).optional().defaultValue(null);
udtSchemaBuilder.field("f").type(SchemaType.FLOAT).optional().defaultValue(null);
udtSchemaBuilder.field("i").type(SchemaType.INT32).optional().defaultValue(null);
udtSchemaBuilder.field("l").type(SchemaType.INT64).optional().defaultValue(null);
GenericSchema<GenericRecord> udtGenericSchema = Schema.generic(udtSchemaBuilder.build(schemaType));
valueSchemaBuilder.field("e", udtGenericSchema).type(schemaType).optional().defaultValue(null);
GenericSchema<GenericRecord> valueSchema = Schema.generic(valueSchemaBuilder.build(schemaType));
GenericRecord valueGenericRecord = valueSchema.newRecordBuilder().set("c", "1").set("d", 1).set("e", udtGenericSchema.newRecordBuilder().set("a", "a").set("b", true).set("d", 1.0).set("f", 1.0f).set("i", 1).set("l", 10L).build()).build();
Record<GenericObject> genericObjectRecord = new Record<GenericObject>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("data-ks1.table1");
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return valueSchema;
}
@Override
public GenericObject getValue() {
return valueGenericRecord;
}
};
// single field PK
ElasticSearchSink elasticSearchSink = new ElasticSearchSink();
elasticSearchSink.open(ImmutableMap.of("elasticSearchUrl", "http://localhost:9200", "compatibilityMode", "ELASTICSEARCH", "primaryFields", "c", "schemaEnable", "true", "keyIgnore", "true"), null);
Pair<String, String> pair = elasticSearchSink.extractIdAndDocument(genericObjectRecord);
assertEquals(pair.getLeft(), "1");
assertEquals(pair.getRight(), "{\"c\":\"1\",\"d\":1,\"e\":{\"a\":\"a\",\"b\":true,\"d\":1.0,\"f\":1.0,\"i\":1,\"l\":10}}");
// two fields PK
ElasticSearchSink elasticSearchSink2 = new ElasticSearchSink();
elasticSearchSink2.open(ImmutableMap.of("elasticSearchUrl", "http://localhost:9200", "compatibilityMode", "ELASTICSEARCH", "primaryFields", "c,d", "schemaEnable", "true", "keyIgnore", "true"), null);
Pair<String, String> pair2 = elasticSearchSink2.extractIdAndDocument(genericObjectRecord);
assertEquals(pair2.getLeft(), "[\"1\",1]");
assertEquals(pair2.getRight(), "{\"c\":\"1\",\"d\":1,\"e\":{\"a\":\"a\",\"b\":true,\"d\":1.0,\"f\":1.0,\"i\":1,\"l\":10}}");
// default config with null PK => indexed with auto generated _id
ElasticSearchSink elasticSearchSink3 = new ElasticSearchSink();
elasticSearchSink3.open(ImmutableMap.of("elasticSearchUrl", "http://localhost:9200", "schemaEnable", "true", "compatibilityMode", "ELASTICSEARCH"), null);
Pair<String, String> pair3 = elasticSearchSink3.extractIdAndDocument(genericObjectRecord);
assertNull(pair3.getLeft());
assertEquals(pair3.getRight(), "{\"c\":\"1\",\"d\":1,\"e\":{\"a\":\"a\",\"b\":true,\"d\":1.0,\"f\":1.0,\"i\":1,\"l\":10}}");
// default config with null PK + null value
ElasticSearchSink elasticSearchSink4 = new ElasticSearchSink();
elasticSearchSink4.open(ImmutableMap.of("elasticSearchUrl", "http://localhost:9200", "compatibilityMode", "ELASTICSEARCH", "schemaEnable", "true"), null);
Pair<String, String> pair4 = elasticSearchSink3.extractIdAndDocument(new Record<GenericObject>() {
@Override
public Optional<String> getTopicName() {
return Optional.of("data-ks1.table1");
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return valueSchema;
}
@Override
public GenericObject getValue() {
return null;
}
});
assertNull(pair4.getLeft());
assertNull(pair4.getRight());
}
Aggregations