use of org.apache.pulsar.functions.source.PulsarSourceConfig in project pulsar by apache.
the class JavaInstanceRunnable method setupInput.
private void setupInput(ContextImpl contextImpl) throws Exception {
SourceSpec sourceSpec = this.instanceConfig.getFunctionDetails().getSource();
Object object;
// If source classname is not set, we default pulsar source
if (sourceSpec.getClassName().isEmpty()) {
Map<String, ConsumerConfig> topicSchema = new TreeMap<>();
sourceSpec.getInputSpecsMap().forEach((topic, conf) -> {
ConsumerConfig consumerConfig = ConsumerConfig.builder().isRegexPattern(conf.getIsRegexPattern()).build();
if (conf.getSchemaType() != null && !conf.getSchemaType().isEmpty()) {
consumerConfig.setSchemaType(conf.getSchemaType());
} else if (conf.getSerdeClassName() != null && !conf.getSerdeClassName().isEmpty()) {
consumerConfig.setSerdeClassName(conf.getSerdeClassName());
}
consumerConfig.setSchemaProperties(conf.getSchemaPropertiesMap());
consumerConfig.setConsumerProperties(conf.getConsumerPropertiesMap());
if (conf.hasReceiverQueueSize()) {
consumerConfig.setReceiverQueueSize(conf.getReceiverQueueSize().getValue());
}
if (conf.hasCryptoSpec()) {
consumerConfig.setCryptoConfig(CryptoUtils.convertFromSpec(conf.getCryptoSpec()));
}
consumerConfig.setPoolMessages(conf.getPoolMessages());
topicSchema.put(topic, consumerConfig);
});
sourceSpec.getTopicsToSerDeClassNameMap().forEach((topic, serde) -> {
topicSchema.put(topic, ConsumerConfig.builder().serdeClassName(serde).isRegexPattern(false).build());
});
if (!StringUtils.isEmpty(sourceSpec.getTopicsPattern())) {
topicSchema.get(sourceSpec.getTopicsPattern()).setRegexPattern(true);
}
PulsarSourceConfig pulsarSourceConfig;
// we can use a single consumer to read
if (topicSchema.size() == 1) {
SingleConsumerPulsarSourceConfig singleConsumerPulsarSourceConfig = new SingleConsumerPulsarSourceConfig();
Map.Entry<String, ConsumerConfig> entry = topicSchema.entrySet().iterator().next();
singleConsumerPulsarSourceConfig.setTopic(entry.getKey());
singleConsumerPulsarSourceConfig.setConsumerConfig(entry.getValue());
pulsarSourceConfig = singleConsumerPulsarSourceConfig;
} else {
MultiConsumerPulsarSourceConfig multiConsumerPulsarSourceConfig = new MultiConsumerPulsarSourceConfig();
multiConsumerPulsarSourceConfig.setTopicSchema(topicSchema);
pulsarSourceConfig = multiConsumerPulsarSourceConfig;
}
pulsarSourceConfig.setSubscriptionName(StringUtils.isNotBlank(sourceSpec.getSubscriptionName()) ? sourceSpec.getSubscriptionName() : InstanceUtils.getDefaultSubscriptionName(instanceConfig.getFunctionDetails()));
pulsarSourceConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.valueOf(this.instanceConfig.getFunctionDetails().getProcessingGuarantees().name()));
pulsarSourceConfig.setSubscriptionPosition(convertFromFunctionDetailsSubscriptionPosition(sourceSpec.getSubscriptionPosition()));
checkNotNull(contextImpl.getSubscriptionType());
pulsarSourceConfig.setSubscriptionType(contextImpl.getSubscriptionType());
pulsarSourceConfig.setTypeClassName(sourceSpec.getTypeClassName());
if (sourceSpec.getTimeoutMs() > 0) {
pulsarSourceConfig.setTimeoutMs(sourceSpec.getTimeoutMs());
}
if (sourceSpec.getNegativeAckRedeliveryDelayMs() > 0) {
pulsarSourceConfig.setNegativeAckRedeliveryDelayMs(sourceSpec.getNegativeAckRedeliveryDelayMs());
}
if (this.instanceConfig.getFunctionDetails().hasRetryDetails()) {
pulsarSourceConfig.setMaxMessageRetries(this.instanceConfig.getFunctionDetails().getRetryDetails().getMaxMessageRetries());
pulsarSourceConfig.setDeadLetterTopic(this.instanceConfig.getFunctionDetails().getRetryDetails().getDeadLetterTopic());
}
// that require messages to be put into an immediate queue
if (pulsarSourceConfig instanceof SingleConsumerPulsarSourceConfig) {
object = new SingleConsumerPulsarSource(this.client, (SingleConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
} else {
object = new MultiConsumerPulsarSource(this.client, (MultiConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
}
} else {
// check if source is a batch source
if (sourceSpec.getClassName().equals(BatchSourceExecutor.class.getName())) {
object = Reflections.createInstance(sourceSpec.getClassName(), this.instanceClassLoader);
} else {
object = Reflections.createInstance(sourceSpec.getClassName(), this.functionClassLoader);
}
}
Class<?>[] typeArgs;
if (object instanceof Source) {
typeArgs = TypeResolver.resolveRawArguments(Source.class, object.getClass());
assert typeArgs.length > 0;
} else {
throw new RuntimeException("Source does not implement correct interface");
}
this.source = (Source<?>) object;
if (componentType == org.apache.pulsar.functions.proto.Function.FunctionDetails.ComponentType.SOURCE) {
Thread.currentThread().setContextClassLoader(this.functionClassLoader);
}
try {
if (sourceSpec.getConfigs().isEmpty()) {
this.source.open(new HashMap<>(), contextImpl);
} else {
this.source.open(ObjectMapperFactory.getThreadLocal().readValue(sourceSpec.getConfigs(), new TypeReference<Map<String, Object>>() {
}), contextImpl);
}
if (this.source instanceof PulsarSource) {
contextImpl.setInputConsumers(((PulsarSource) this.source).getInputConsumers());
}
} catch (Exception e) {
log.error("Source open produced uncaught exception: ", e);
throw e;
} finally {
Thread.currentThread().setContextClassLoader(this.instanceClassLoader);
}
}
use of org.apache.pulsar.functions.source.PulsarSourceConfig in project pulsar by apache.
the class HbaseGenericRecordSinkTest method TestOpenAndWriteSink.
@Test(enabled = false)
public void TestOpenAndWriteSink() throws Exception {
message = mock(MessageImpl.class);
GenericSchema<GenericRecord> genericAvroSchema;
Map<String, Object> map = new HashMap<>();
map.put("zookeeperQuorum", "localhost");
map.put("zookeeperClientPort", "2181");
map.put("zookeeperZnodeParent", "/hbase");
map.put("tableName", "default:pulsar_hbase");
map.put("rowKeyName", rowKeyName);
map.put("familyName", familyName);
List<String> qualifierNames = new ArrayList<>();
qualifierNames.add(name);
qualifierNames.add(address);
qualifierNames.add(age);
qualifierNames.add(flag);
map.put("qualifierNames", qualifierNames);
mockSinkContext = mock(SinkContext.class);
HbaseGenericRecordSink sink = new HbaseGenericRecordSink();
// prepare a foo Record
Foo obj = new Foo();
obj.setRowKey("rowKey_value");
obj.setName("name_value");
obj.setAddress("address_value");
obj.setAge(30);
obj.setFlag(true);
AvroSchema<Foo> schema = AvroSchema.of(SchemaDefinition.<Foo>builder().withPojo(Foo.class).build());
byte[] bytes = schema.encode(obj);
AutoConsumeSchema autoConsumeSchema = new AutoConsumeSchema();
autoConsumeSchema.setSchema(GenericSchemaImpl.of(schema.getSchemaInfo()));
PulsarSourceConfig pulsarSourceConfig = mock(PulsarSourceConfig.class);
Consumer consumer = mock(Consumer.class);
Record<GenericRecord> record = PulsarRecord.<GenericRecord>builder().message(message).topicName("fake_topic_name").ackFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
consumer.acknowledgeCumulativeAsync(message);
} else {
consumer.acknowledgeAsync(message);
}
}).failFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
throw new RuntimeException("Failed to process message: " + message.getMessageId());
}
}).build();
genericAvroSchema = new GenericAvroSchema(schema.getSchemaInfo());
when(message.getValue()).thenReturn(genericAvroSchema.decode(bytes));
log.info("foo:{}, Message.getValue: {}, record.getValue: {}", obj.toString(), message.getValue().toString(), record.getValue().toString());
// change batchSize to 1, to flush on each write.
map.put("batchTimeMs", 1);
map.put("batchSize", 1);
// open should success
sink.open(map, mockSinkContext);
// write should success.
sink.write(record);
log.info("executed write");
// sleep to wait backend flush complete
Thread.sleep(500);
// value has been written to hbase table, read it out and verify.
Table table = TableUtils.getTable(map);
Get scan = new Get(Bytes.toBytes(obj.getRowKey()));
Result result = table.get(scan);
byte[] byteName = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(name));
byte[] byteAddress = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(address));
byte[] byteAge = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(age));
byte[] byteFlag = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(flag));
Assert.assertEquals(obj.getName(), Bytes.toString(byteName));
Assert.assertEquals(obj.getAddress(), Bytes.toString(byteAddress));
Assert.assertEquals(obj.getAge(), Bytes.toInt(byteAge));
Assert.assertEquals(obj.isFlag(), Bytes.toBoolean(byteFlag));
table.close();
sink.close();
}
use of org.apache.pulsar.functions.source.PulsarSourceConfig in project pulsar by yahoo.
the class HbaseGenericRecordSinkTest method TestOpenAndWriteSink.
@Test(enabled = false)
public void TestOpenAndWriteSink() throws Exception {
message = mock(MessageImpl.class);
GenericSchema<GenericRecord> genericAvroSchema;
Map<String, Object> map = new HashMap<>();
map.put("zookeeperQuorum", "localhost");
map.put("zookeeperClientPort", "2181");
map.put("zookeeperZnodeParent", "/hbase");
map.put("tableName", "default:pulsar_hbase");
map.put("rowKeyName", rowKeyName);
map.put("familyName", familyName);
List<String> qualifierNames = new ArrayList<>();
qualifierNames.add(name);
qualifierNames.add(address);
qualifierNames.add(age);
qualifierNames.add(flag);
map.put("qualifierNames", qualifierNames);
mockSinkContext = mock(SinkContext.class);
HbaseGenericRecordSink sink = new HbaseGenericRecordSink();
// prepare a foo Record
Foo obj = new Foo();
obj.setRowKey("rowKey_value");
obj.setName("name_value");
obj.setAddress("address_value");
obj.setAge(30);
obj.setFlag(true);
AvroSchema<Foo> schema = AvroSchema.of(SchemaDefinition.<Foo>builder().withPojo(Foo.class).build());
byte[] bytes = schema.encode(obj);
AutoConsumeSchema autoConsumeSchema = new AutoConsumeSchema();
autoConsumeSchema.setSchema(GenericSchemaImpl.of(schema.getSchemaInfo()));
PulsarSourceConfig pulsarSourceConfig = mock(PulsarSourceConfig.class);
Consumer consumer = mock(Consumer.class);
Record<GenericRecord> record = PulsarRecord.<GenericRecord>builder().message(message).topicName("fake_topic_name").ackFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
consumer.acknowledgeCumulativeAsync(message);
} else {
consumer.acknowledgeAsync(message);
}
}).failFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
throw new RuntimeException("Failed to process message: " + message.getMessageId());
}
}).build();
genericAvroSchema = new GenericAvroSchema(schema.getSchemaInfo());
when(message.getValue()).thenReturn(genericAvroSchema.decode(bytes));
log.info("foo:{}, Message.getValue: {}, record.getValue: {}", obj.toString(), message.getValue().toString(), record.getValue().toString());
// change batchSize to 1, to flush on each write.
map.put("batchTimeMs", 1);
map.put("batchSize", 1);
// open should success
sink.open(map, mockSinkContext);
// write should success.
sink.write(record);
log.info("executed write");
// sleep to wait backend flush complete
Thread.sleep(500);
// value has been written to hbase table, read it out and verify.
Table table = TableUtils.getTable(map);
Get scan = new Get(Bytes.toBytes(obj.getRowKey()));
Result result = table.get(scan);
byte[] byteName = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(name));
byte[] byteAddress = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(address));
byte[] byteAge = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(age));
byte[] byteFlag = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(flag));
Assert.assertEquals(obj.getName(), Bytes.toString(byteName));
Assert.assertEquals(obj.getAddress(), Bytes.toString(byteAddress));
Assert.assertEquals(obj.getAge(), Bytes.toInt(byteAge));
Assert.assertEquals(obj.isFlag(), Bytes.toBoolean(byteFlag));
table.close();
sink.close();
}
use of org.apache.pulsar.functions.source.PulsarSourceConfig in project incubator-pulsar by apache.
the class JavaInstanceRunnable method setupInput.
private void setupInput(ContextImpl contextImpl) throws Exception {
SourceSpec sourceSpec = this.instanceConfig.getFunctionDetails().getSource();
Object object;
// If source classname is not set, we default pulsar source
if (sourceSpec.getClassName().isEmpty()) {
Map<String, ConsumerConfig> topicSchema = new TreeMap<>();
sourceSpec.getInputSpecsMap().forEach((topic, conf) -> {
ConsumerConfig consumerConfig = ConsumerConfig.builder().isRegexPattern(conf.getIsRegexPattern()).build();
if (conf.getSchemaType() != null && !conf.getSchemaType().isEmpty()) {
consumerConfig.setSchemaType(conf.getSchemaType());
} else if (conf.getSerdeClassName() != null && !conf.getSerdeClassName().isEmpty()) {
consumerConfig.setSerdeClassName(conf.getSerdeClassName());
}
consumerConfig.setSchemaProperties(conf.getSchemaPropertiesMap());
consumerConfig.setConsumerProperties(conf.getConsumerPropertiesMap());
if (conf.hasReceiverQueueSize()) {
consumerConfig.setReceiverQueueSize(conf.getReceiverQueueSize().getValue());
}
if (conf.hasCryptoSpec()) {
consumerConfig.setCryptoConfig(CryptoUtils.convertFromSpec(conf.getCryptoSpec()));
}
consumerConfig.setPoolMessages(conf.getPoolMessages());
topicSchema.put(topic, consumerConfig);
});
sourceSpec.getTopicsToSerDeClassNameMap().forEach((topic, serde) -> {
topicSchema.put(topic, ConsumerConfig.builder().serdeClassName(serde).isRegexPattern(false).build());
});
if (!StringUtils.isEmpty(sourceSpec.getTopicsPattern())) {
topicSchema.get(sourceSpec.getTopicsPattern()).setRegexPattern(true);
}
PulsarSourceConfig pulsarSourceConfig;
// we can use a single consumer to read
if (topicSchema.size() == 1) {
SingleConsumerPulsarSourceConfig singleConsumerPulsarSourceConfig = new SingleConsumerPulsarSourceConfig();
Map.Entry<String, ConsumerConfig> entry = topicSchema.entrySet().iterator().next();
singleConsumerPulsarSourceConfig.setTopic(entry.getKey());
singleConsumerPulsarSourceConfig.setConsumerConfig(entry.getValue());
pulsarSourceConfig = singleConsumerPulsarSourceConfig;
} else {
MultiConsumerPulsarSourceConfig multiConsumerPulsarSourceConfig = new MultiConsumerPulsarSourceConfig();
multiConsumerPulsarSourceConfig.setTopicSchema(topicSchema);
pulsarSourceConfig = multiConsumerPulsarSourceConfig;
}
pulsarSourceConfig.setSubscriptionName(StringUtils.isNotBlank(sourceSpec.getSubscriptionName()) ? sourceSpec.getSubscriptionName() : InstanceUtils.getDefaultSubscriptionName(instanceConfig.getFunctionDetails()));
pulsarSourceConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.valueOf(this.instanceConfig.getFunctionDetails().getProcessingGuarantees().name()));
pulsarSourceConfig.setSubscriptionPosition(convertFromFunctionDetailsSubscriptionPosition(sourceSpec.getSubscriptionPosition()));
checkNotNull(contextImpl.getSubscriptionType());
pulsarSourceConfig.setSubscriptionType(contextImpl.getSubscriptionType());
pulsarSourceConfig.setTypeClassName(sourceSpec.getTypeClassName());
if (sourceSpec.getTimeoutMs() > 0) {
pulsarSourceConfig.setTimeoutMs(sourceSpec.getTimeoutMs());
}
if (sourceSpec.getNegativeAckRedeliveryDelayMs() > 0) {
pulsarSourceConfig.setNegativeAckRedeliveryDelayMs(sourceSpec.getNegativeAckRedeliveryDelayMs());
}
if (this.instanceConfig.getFunctionDetails().hasRetryDetails()) {
pulsarSourceConfig.setMaxMessageRetries(this.instanceConfig.getFunctionDetails().getRetryDetails().getMaxMessageRetries());
pulsarSourceConfig.setDeadLetterTopic(this.instanceConfig.getFunctionDetails().getRetryDetails().getDeadLetterTopic());
}
// that require messages to be put into an immediate queue
if (pulsarSourceConfig instanceof SingleConsumerPulsarSourceConfig) {
object = new SingleConsumerPulsarSource(this.client, (SingleConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
} else {
object = new MultiConsumerPulsarSource(this.client, (MultiConsumerPulsarSourceConfig) pulsarSourceConfig, this.properties, this.functionClassLoader);
}
} else {
// check if source is a batch source
if (sourceSpec.getClassName().equals(BatchSourceExecutor.class.getName())) {
object = Reflections.createInstance(sourceSpec.getClassName(), this.instanceClassLoader);
} else {
object = Reflections.createInstance(sourceSpec.getClassName(), this.functionClassLoader);
}
}
Class<?>[] typeArgs;
if (object instanceof Source) {
typeArgs = TypeResolver.resolveRawArguments(Source.class, object.getClass());
assert typeArgs.length > 0;
} else {
throw new RuntimeException("Source does not implement correct interface");
}
this.source = (Source<?>) object;
if (componentType == org.apache.pulsar.functions.proto.Function.FunctionDetails.ComponentType.SOURCE) {
Thread.currentThread().setContextClassLoader(this.functionClassLoader);
}
try {
if (sourceSpec.getConfigs().isEmpty()) {
this.source.open(new HashMap<>(), contextImpl);
} else {
this.source.open(ObjectMapperFactory.getThreadLocal().readValue(sourceSpec.getConfigs(), new TypeReference<Map<String, Object>>() {
}), contextImpl);
}
if (this.source instanceof PulsarSource) {
contextImpl.setInputConsumers(((PulsarSource) this.source).getInputConsumers());
}
} catch (Exception e) {
log.error("Source open produced uncaught exception: ", e);
throw e;
} finally {
Thread.currentThread().setContextClassLoader(this.instanceClassLoader);
}
}
use of org.apache.pulsar.functions.source.PulsarSourceConfig in project incubator-pulsar by apache.
the class HbaseGenericRecordSinkTest method TestOpenAndWriteSink.
@Test(enabled = false)
public void TestOpenAndWriteSink() throws Exception {
message = mock(MessageImpl.class);
GenericSchema<GenericRecord> genericAvroSchema;
Map<String, Object> map = new HashMap<>();
map.put("zookeeperQuorum", "localhost");
map.put("zookeeperClientPort", "2181");
map.put("zookeeperZnodeParent", "/hbase");
map.put("tableName", "default:pulsar_hbase");
map.put("rowKeyName", rowKeyName);
map.put("familyName", familyName);
List<String> qualifierNames = new ArrayList<>();
qualifierNames.add(name);
qualifierNames.add(address);
qualifierNames.add(age);
qualifierNames.add(flag);
map.put("qualifierNames", qualifierNames);
mockSinkContext = mock(SinkContext.class);
HbaseGenericRecordSink sink = new HbaseGenericRecordSink();
// prepare a foo Record
Foo obj = new Foo();
obj.setRowKey("rowKey_value");
obj.setName("name_value");
obj.setAddress("address_value");
obj.setAge(30);
obj.setFlag(true);
AvroSchema<Foo> schema = AvroSchema.of(SchemaDefinition.<Foo>builder().withPojo(Foo.class).build());
byte[] bytes = schema.encode(obj);
AutoConsumeSchema autoConsumeSchema = new AutoConsumeSchema();
autoConsumeSchema.setSchema(GenericSchemaImpl.of(schema.getSchemaInfo()));
PulsarSourceConfig pulsarSourceConfig = mock(PulsarSourceConfig.class);
Consumer consumer = mock(Consumer.class);
Record<GenericRecord> record = PulsarRecord.<GenericRecord>builder().message(message).topicName("fake_topic_name").ackFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
consumer.acknowledgeCumulativeAsync(message);
} else {
consumer.acknowledgeAsync(message);
}
}).failFunction(() -> {
if (pulsarSourceConfig.getProcessingGuarantees() == FunctionConfig.ProcessingGuarantees.EFFECTIVELY_ONCE) {
throw new RuntimeException("Failed to process message: " + message.getMessageId());
}
}).build();
genericAvroSchema = new GenericAvroSchema(schema.getSchemaInfo());
when(message.getValue()).thenReturn(genericAvroSchema.decode(bytes));
log.info("foo:{}, Message.getValue: {}, record.getValue: {}", obj.toString(), message.getValue().toString(), record.getValue().toString());
// change batchSize to 1, to flush on each write.
map.put("batchTimeMs", 1);
map.put("batchSize", 1);
// open should success
sink.open(map, mockSinkContext);
// write should success.
sink.write(record);
log.info("executed write");
// sleep to wait backend flush complete
Thread.sleep(500);
// value has been written to hbase table, read it out and verify.
Table table = TableUtils.getTable(map);
Get scan = new Get(Bytes.toBytes(obj.getRowKey()));
Result result = table.get(scan);
byte[] byteName = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(name));
byte[] byteAddress = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(address));
byte[] byteAge = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(age));
byte[] byteFlag = result.getValue(Bytes.toBytes(familyName), Bytes.toBytes(flag));
Assert.assertEquals(obj.getName(), Bytes.toString(byteName));
Assert.assertEquals(obj.getAddress(), Bytes.toString(byteAddress));
Assert.assertEquals(obj.getAge(), Bytes.toInt(byteAge));
Assert.assertEquals(obj.isFlag(), Bytes.toBoolean(byteFlag));
table.close();
sink.close();
}
Aggregations