Search in sources :

Example 1 with KafkaInputProperties

use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.

the class KafkaDatasetRuntime method getSample.

/**
 * @param limit the maximum number of records to return.
 * @param consumer a callback that will be applied to each sampled record. This callback should throw a
 * {@link org.talend.daikon.exception.TalendRuntimeException} if there was an error processing the record. Kafka is
 * a unbounded source, have to set time out to stop reading, 1 second as the time out for get Sample, no matter if
 * it get sample or not.
 */
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
    // Create an input runtime based on the properties.
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    KafkaInputProperties inputProperties = new KafkaInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(dataset);
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(1000l);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    // TODO: BEAM-1847: Enable both stopping conditions when they can be set, and remove Sample transform from job.
    // inputProperties.useMaxNumRecords.setValue(true);
    // inputProperties.maxNumRecords.setValue(Long.valueOf(limit));
    inputRuntime.initialize(null, inputProperties);
    // Create a pipeline using the input component to get records.
    PipelineOptions options = PipelineOptionsFactory.create();
    final Pipeline p = Pipeline.create(options);
    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        // 
        p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
        p.run().waitUntilFinish();
    }
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) Pipeline(org.apache.beam.sdk.Pipeline)

Example 2 with KafkaInputProperties

use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.

the class KafkaAvroBeamRuntimeTestIT method avroBasicTest.

/**
 * Read avro(Person) format and write avro(Person) format with schema.
 */
@Test
public void avroBasicTest() throws IOException {
    String testID = "avroBasicTest" + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_AVRO_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
    Producer<Void, byte[]> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, byte[]> message = new ProducerRecord<>(TOPIC_AVRO_IN, person.serToAvroBytes());
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_AVRO_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
    outputProperties.useCompress.setValue(false);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new KafkaCsvBeamRuntimeTestIT.FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_AVRO_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_AVRO_OUT));
    List<Person> results = new ArrayList<>();
    while (true) {
        ConsumerRecords<String, byte[]> records = consumer.poll(100);
        for (ConsumerRecord<String, byte[]> record : records) {
            Person person = Person.desFromAvroBytes(record.value());
            if (testID.equals(person.group)) {
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_AVRO_OUT done --------------------
    assertEquals(expectedPersons, results);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Test(org.junit.Test)

Example 3 with KafkaInputProperties

use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.

the class KafkaAvroBeamRuntimeTestIT method avroBasicTest2.

/**
 * Read avro(Person) format and write avro(Person) format with schema.
 */
@Test
public void avroBasicTest2() throws IOException {
    String testID = "avroBasicTest2" + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_AVRO_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
    Producer<Void, byte[]> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, byte[]> message = new ProducerRecord<>(TOPIC_AVRO_IN, person.serToAvroBytes());
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_AVRO_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
    outputProperties.keyColumn.setValue("name");
    outputProperties.useCompress.setValue(false);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new KafkaCsvBeamRuntimeTestIT.FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_AVRO_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_AVRO_OUT));
    List<Person> results = new ArrayList<>();
    List<String> keys = new ArrayList<>();
    while (true) {
        ConsumerRecords<String, byte[]> records = consumer.poll(100);
        for (ConsumerRecord<String, byte[]> record : records) {
            Person person = Person.desFromAvroBytes(record.value());
            if (testID.equals(person.group)) {
                keys.add(record.key());
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_AVRO_OUT done --------------------
    assertEquals(expectedPersons, results);
    List<String> expectedKeys = new ArrayList<>();
    for (Person person : results) {
        expectedKeys.add(person.name);
    }
    assertEquals(expectedKeys, keys);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Test(org.junit.Test)

Example 4 with KafkaInputProperties

use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.

the class KafkaCsvBeamRuntimeTestIT method basicTest.

public void basicTest(String title, String topicSuffix, String fieldDelim) {
    String testID = title + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    Producer<Void, String> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
    outputProperties.useCompress.setValue(false);
    inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
    outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<Void, String> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
    List<Person> results = new ArrayList<>();
    while (true) {
        ConsumerRecords<Void, String> records = consumer.poll(100);
        for (ConsumerRecord<Void, String> record : records) {
            Person person = Person.fromCSV(record.value(), fieldDelim);
            if (testID.equals(person.group)) {
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_OUT end --------------------
    assertEquals(expectedPersons, results);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord)

Example 5 with KafkaInputProperties

use of org.talend.components.kafka.input.KafkaInputProperties in project components by Talend.

the class KafkaCsvBeamRuntimeTestIT method basicTest2.

public void basicTest2(String title, String topicSuffix, String fieldDelim) {
    String testID = title + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    Producer<Void, String> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
    // name generated by KafkaAvroRegistry
    outputProperties.keyColumn.setValue("field1");
    outputProperties.useCompress.setValue(false);
    inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
    outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
    List<Person> results = new ArrayList<>();
    List<String> keys = new ArrayList<>();
    while (true) {
        ConsumerRecords<String, String> records = consumer.poll(100);
        for (ConsumerRecord<String, String> record : records) {
            Person person = Person.fromCSV(record.value(), fieldDelim);
            if (testID.equals(person.group)) {
                keys.add(record.key());
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_OUT end --------------------
    assertEquals(expectedPersons, results);
    List<String> expectedKeys = new ArrayList<>();
    for (Person person : results) {
        expectedKeys.add(person.name);
    }
    assertEquals(expectedKeys, keys);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord)

Aggregations

KafkaInputProperties (org.talend.components.kafka.input.KafkaInputProperties)5 ArrayList (java.util.ArrayList)4 Properties (java.util.Properties)4 Random (java.util.Random)4 PipelineResult (org.apache.beam.sdk.PipelineResult)4 KafkaConsumer (org.apache.kafka.clients.consumer.KafkaConsumer)4 KafkaProducer (org.apache.kafka.clients.producer.KafkaProducer)4 ProducerRecord (org.apache.kafka.clients.producer.ProducerRecord)4 KafkaDatasetProperties (org.talend.components.kafka.dataset.KafkaDatasetProperties)4 KafkaDatastoreProperties (org.talend.components.kafka.datastore.KafkaDatastoreProperties)4 KafkaOutputProperties (org.talend.components.kafka.output.KafkaOutputProperties)4 Test (org.junit.Test)2 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 Pipeline (org.apache.beam.sdk.Pipeline)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1