use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class MetricsTest method testCommittedCounterMetrics.
@Category({ ValidatesRunner.class, UsesCommittedMetrics.class, UsesCounterMetrics.class })
@Test
public void testCommittedCounterMetrics() {
PipelineResult result = runPipelineWithMetrics();
MetricQueryResults metrics = queryTestMetrics(result);
assertCounterMetrics(metrics, true);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class MetricsTest method testCommittedDistributionMetrics.
@Category({ ValidatesRunner.class, UsesCommittedMetrics.class, UsesDistributionMetrics.class })
@Test
public void testCommittedDistributionMetrics() {
PipelineResult result = runPipelineWithMetrics();
MetricQueryResults metrics = queryTestMetrics(result);
assertDistributionMetrics(metrics, true);
}
use of org.apache.beam.sdk.PipelineResult in project components by Talend.
the class BigQueryDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
// Create an input runtime based on the properties.
BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
PipelineResult pr = p.run();
pr.waitUntilFinish();
}
}
use of org.apache.beam.sdk.PipelineResult in project components by Talend.
the class KafkaCsvBeamRuntimeTestIT method basicTest.
public void basicTest(String title, String topicSuffix, String fieldDelim) {
String testID = title + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<Void, String> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.ROUND_ROBIN);
outputProperties.useCompress.setValue(false);
inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<Void, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
List<Person> results = new ArrayList<>();
while (true) {
ConsumerRecords<Void, String> records = consumer.poll(100);
for (ConsumerRecord<Void, String> record : records) {
Person person = Person.fromCSV(record.value(), fieldDelim);
if (testID.equals(person.group)) {
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_OUT end --------------------
assertEquals(expectedPersons, results);
}
use of org.apache.beam.sdk.PipelineResult in project components by Talend.
the class KafkaCsvBeamRuntimeTestIT method basicTest2.
public void basicTest2(String title, String topicSuffix, String fieldDelim) {
String testID = title + new Random().nextInt();
expectedPersons = Person.genRandomList(testID, maxRecords);
// ----------------- Send data to TOPIC_IN start --------------------
Properties props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<Void, String> producer = new KafkaProducer<>(props);
for (Person person : expectedPersons) {
ProducerRecord<Void, String> message = new ProducerRecord<>(TOPIC_IN + topicSuffix, person.toCSV(fieldDelim));
producer.send(message);
}
producer.close();
// ----------------- Send data to TOPIC_IN done --------------------
KafkaInputProperties inputProperties = new KafkaInputProperties("input");
inputProperties.init();
inputProperties.setDatasetProperties(inputDatasetProperties);
inputProperties.autoOffsetReset.setValue(KafkaInputProperties.OffsetType.EARLIEST);
inputProperties.useMaxNumRecords.setValue(false);
// inputProperties.maxNumRecords.setValue(maxRecords.longValue());
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(5000l);
KafkaOutputProperties outputProperties = new KafkaOutputProperties("output");
outputProperties.init();
outputProperties.setDatasetProperties(outputDatasetProperties);
outputProperties.partitionType.setValue(KafkaOutputProperties.PartitionType.COLUMN);
// name generated by KafkaAvroRegistry
outputProperties.keyColumn.setValue("field1");
outputProperties.useCompress.setValue(false);
inputDatasetProperties.topic.setValue(TOPIC_IN + topicSuffix);
outputDatasetProperties.topic.setValue(TOPIC_OUT + topicSuffix);
KafkaInputPTransformRuntime inputRuntime = new KafkaInputPTransformRuntime();
inputRuntime.initialize(null, inputProperties);
KafkaOutputPTransformRuntime outputRuntime = new KafkaOutputPTransformRuntime();
outputRuntime.initialize(null, outputProperties);
// ----------------- pipeline start --------------------
pipeline.apply(inputRuntime).apply(Filter.by(new FilterByGroup(testID))).apply(outputRuntime);
PipelineResult result = pipeline.run();
// ----------------- pipeline done --------------------
// ----------------- Read data from TOPIC_OUT start --------------------
props = new Properties();
props.put("bootstrap.servers", BOOTSTRAP_HOST);
props.put("group.id", "getResult");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "earliest");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(TOPIC_OUT + topicSuffix));
List<Person> results = new ArrayList<>();
List<String> keys = new ArrayList<>();
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
Person person = Person.fromCSV(record.value(), fieldDelim);
if (testID.equals(person.group)) {
keys.add(record.key());
results.add(person);
}
}
if (results.size() >= maxRecords) {
break;
}
}
// ----------------- Read data from TOPIC_OUT end --------------------
assertEquals(expectedPersons, results);
List<String> expectedKeys = new ArrayList<>();
for (Person person : results) {
expectedKeys.add(person.name);
}
assertEquals(expectedKeys, keys);
}
Aggregations