use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.
the class KafkaDatasetRuntime method getSample.
/**
* @param limit the maximum number of records to return.
* @param consumer a callback that will be applied to each sampled record. This callback should throw a
* {@link org.talend.daikon.exception.TalendRuntimeException} if there was an error processing the record. Kafka is
* a unbounded source, have to set time out to stop reading, 1 second as the time out for get Sample, no matter if
* it get sample or not.
*/
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create an input runtime based on the properties.
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
KafkaInputProperties inputProperties = new KafkaInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(dataset);
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(1000l);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
// TODO: BEAM-1847: Enable both stopping conditions when they can be set, and remove Sample transform from job.
// inputProperties.useMaxNumRecords.setValue(true);
// inputProperties.maxNumRecords.setValue(Long.valueOf(limit));
inputRuntime.initialize(null, inputProperties);
// Create a pipeline using the input component to get records.
PipelineOptions options = PipelineOptionsFactory.create();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
p.run().waitUntilFinish();
}
}
use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.
the class KafkaAvroBeamRuntimeTestIT method avroBasicTest.
/**
* Read avro(Person) format and write avro(Person) format with schema.
*/
@Test
public void avroBasicTest() throws IOException {
String testID = "avroBasicTest" + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_AVRO_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
Producer<Void, byte[]> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, byte[]> message = new ProducerRecord<>(TOPIC_AVRO_IN, person.serToAvroBytes());
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_AVRO_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
outputProperties.useCompress.setValue(false);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new KafkaCsvBeamRuntimeTestIT.FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_AVRO_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_AVRO_OUT));
List<Person> results = new ArrayList<>();
while (true) {
ConsumerRecords<String, byte[]> records = consumer.poll(100);
for (ConsumerRecord<String, byte[]> record : records) {
Person person = Person.desFromAvroBytes(record.value());
if (testID.equals(person.group)) {
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_AVRO_OUT done --------------------
assertEquals(expectedPersons, results);
}
use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.
the class KafkaAvroBeamRuntimeTestIT method avroBasicTest2.
/**
* Read avro(Person) format and write avro(Person) format with schema.
*/
@Test
public void avroBasicTest2() throws IOException {
String testID = "avroBasicTest2" + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_AVRO_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
Producer<Void, byte[]> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, byte[]> message = new ProducerRecord<>(TOPIC_AVRO_IN, person.serToAvroBytes());
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_AVRO_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
outputProperties.keyColumn.setValue("name");
outputProperties.useCompress.setValue(false);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new KafkaCsvBeamRuntimeTestIT.FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_AVRO_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_AVRO_OUT));
List<Person> results = new ArrayList<>();
List<String> keys = new ArrayList<>();
while (true) {
ConsumerRecords<String, byte[]> records = consumer.poll(100);
for (ConsumerRecord<String, byte[]> record : records) {
Person person = Person.desFromAvroBytes(record.value());
if (testID.equals(person.group)) {
keys.add(record.key());
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_AVRO_OUT done --------------------
assertEquals(expectedPersons, results);
List<String> expectedKeys = new ArrayList<>();
for (Person person : results) {
expectedKeys.add(person.name);
}
assertEquals(expectedKeys, keys);
}
use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.
the class KafkaCsvBeamRuntimeTestIT method basicTest.
public void basicTest(String title, String topicSuffix, String fieldDelim) {
String testID = title + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<Void, String> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
outputProperties.useCompress.setValue(false);
inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<Void, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
List<Person> results = new ArrayList<>();
while (true) {
ConsumerRecords<Void, String> records = consumer.poll(100);
for (ConsumerRecord<Void, String> record : records) {
Person person = Person.fromCSV(record.value(), fieldDelim);
if (testID.equals(person.group)) {
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_OUT end --------------------
assertEquals(expectedPersons, results);
}
use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.
the class KafkaCsvBeamRuntimeTestIT method basicTest2.
public void basicTest2(String title, String topicSuffix, String fieldDelim) {
String testID = title + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<Void, String> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
// name generated by KafkaAvroRegistry
outputProperties.keyColumn.setValue("field1");
outputProperties.useCompress.setValue(false);
inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
List<Person> results = new ArrayList<>();
List<String> keys = new ArrayList<>();
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
Person person = Person.fromCSV(record.value(), fieldDelim);
if (testID.equals(person.group)) {
keys.add(record.key());
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_OUT end --------------------
assertEquals(expectedPersons, results);
List<String> expectedKeys = new ArrayList<>();
for (Person person : results) {
expectedKeys.add(person.name);
}
assertEquals(expectedKeys, keys);
}
Aggregations