Search in sources :

Example 56 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class MetricsTest method testCommittedCounterMetrics.

@Category({ ValidatesRunner.class, UsesCommittedMetrics.class, UsesCounterMetrics.class })
@Test
public void testCommittedCounterMetrics() {
    PipelineResult result = runPipelineWithMetrics();
    MetricQueryResults metrics = queryTestMetrics(result);
    assertCounterMetrics(metrics, true);
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 57 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class MetricsTest method testCommittedDistributionMetrics.

@Category({ ValidatesRunner.class, UsesCommittedMetrics.class, UsesDistributionMetrics.class })
@Test
public void testCommittedDistributionMetrics() {
    PipelineResult result = runPipelineWithMetrics();
    MetricQueryResults metrics = queryTestMetrics(result);
    assertDistributionMetrics(metrics, true);
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 58 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project components by Talend.

the class BigQueryDatasetRuntime method getSample.

@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
    // Create a pipeline using the input component to get records.
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);
    // Create an input runtime based on the properties.
    BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
    BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        // 
        p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
        PipelineResult pr = p.run();
        pr.waitUntilFinish();
    }
}
Also used : BeamJobRuntimeContainer(org.talend.components.adapter.beam.BeamJobRuntimeContainer) IndexedRecord(org.apache.avro.generic.IndexedRecord) PipelineResult(org.apache.beam.sdk.PipelineResult) BigQueryInputProperties(org.talend.components.bigquery.input.BigQueryInputProperties) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Example 59 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project components by Talend.

the class KafkaCsvBeamRuntimeTestIT method basicTest.

public void basicTest(String title, String topicSuffix, String fieldDelim) {
    String testID = title + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    Producer<Void, String> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
    outputProperties.useCompress.setValue(false);
    inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
    outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<Void, String> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
    List<Person> results = new ArrayList<>();
    while (true) {
        ConsumerRecords<Void, String> records = consumer.poll(100);
        for (ConsumerRecord<Void, String> record : records) {
            Person person = Person.fromCSV(record.value(), fieldDelim);
            if (testID.equals(person.group)) {
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_OUT end --------------------
    assertEquals(expectedPersons, results);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord)

Example 60 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project components by Talend.

the class KafkaCsvBeamRuntimeTestIT method basicTest2.

public void basicTest2(String title, String topicSuffix, String fieldDelim) {
    String testID = title + new Random().nextInt();
    expectedPersons = Person.genRandomList(testID, maxRecords);
    // ----------------- Send data to TOPIC_IN start --------------------
    Properties props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    Producer<Void, String> producer = new KafkaProducer<>(props);
    for (Person person : expectedPersons) {
        ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
        producer.send(message);
    }
    producer.close();
    // ----------------- Send data to TOPIC_IN done --------------------
    KafkaInputProperties inputProperties = new KafkaInputProperties("input");
    inputProperties.init();
    inputProperties.setDatasetProperties(inputDatasetProperties);
    inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
    inputProperties.useMaxNumRecords.setValue(false);
    // inputProperties.maxNumRecords.setValue(maxRecords.longValue());
    inputProperties.useMaxReadTime.setValue(true);
    inputProperties.maxReadTime.setValue(5000l);
    KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
    outputProperties.init();
    outputProperties.setDatasetProperties(outputDatasetProperties);
    outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
    // name generated by KafkaAvroRegistry
    outputProperties.keyColumn.setValue("field1");
    outputProperties.useCompress.setValue(false);
    inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
    outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
    KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
    inputRuntime.initialize(null, inputProperties);
    KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
    outputRuntime.initialize(null, outputProperties);
    // ----------------- pipeline start --------------------
    pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
    PipelineResult result = pipeline.run();
    // ----------------- pipeline done --------------------
    // ----------------- Read data from TOPIC_OUT start --------------------
    props = new Properties();
    props.put("bootstrap.servers", BOOTSTRAP_HOST);
    props.put("group.id", "getResult");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "earliest");
    KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
    List<Person> results = new ArrayList<>();
    List<String> keys = new ArrayList<>();
    while (true) {
        ConsumerRecords<String, String> records = consumer.poll(100);
        for (ConsumerRecord<String, String> record : records) {
            Person person = Person.fromCSV(record.value(), fieldDelim);
            if (testID.equals(person.group)) {
                keys.add(record.key());
                results.add(person);
            }
        }
        if (results.size() >= maxRecords) {
            break;
        }
    }
    // ----------------- Read data from TOPIC_OUT end --------------------
    assertEquals(expectedPersons, results);
    List<String> expectedKeys = new ArrayList<>();
    for (Person person : results) {
        expectedKeys.add(person.name);
    }
    assertEquals(expectedKeys, keys);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) KafkaInputProperties(org.talend.components.kafka.input.KafkaInputProperties) KafkaDatasetProperties(org.talend.components.kafka.dataset.KafkaDatasetProperties) Properties(java.util.Properties) KafkaDatastoreProperties(org.talend.components.kafka.datastore.KafkaDatastoreProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) KafkaOutputProperties(org.talend.components.kafka.output.KafkaOutputProperties) Random(java.util.Random) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6