use of org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource in project beam by apache.
the class KafkaIOIT method testKafkaIOReadsAndWritesCorrectlyInStreaming.
@Test
public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException {
// Use batch pipeline to write records.
writePipeline.apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))).apply("Write to Kafka", writeToKafka());
// Use streaming pipeline to read Kafka records.
readPipeline.getOptions().as(Options.class).setStreaming(true);
readPipeline.apply("Read from unbounded Kafka", readFromKafka()).apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))).apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())).apply("Counting element", ParDo.of(new CountingFn(NAMESPACE, READ_ELEMENT_METRIC_NAME)));
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
PipelineResult readResult = readPipeline.run();
PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
cancelIfTimeouted(readResult, readState);
assertEquals(sourceOptions.numRecords, readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME));
if (!options.isWithTestcontainers()) {
Set<NamedTestResult> metrics = readMetrics(writeResult, readResult);
IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings);
}
}
use of org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource in project beam by apache.
the class BigQueryIOIT method testWrite.
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
Pipeline pipeline = Pipeline.create(options);
BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
pipeline.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))).apply("Map records", ParDo.of(new MapKVToV())).apply("Write to BQ", writeIO.to(tableQualifier).withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)).withMethod(method).withSchema(new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName("data").setType("BYTES")))));
PipelineResult pipelineResult = pipeline.run();
pipelineResult.waitUntilFinish();
extractAndPublishTime(pipelineResult, metricName);
}
use of org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource in project beam by apache.
the class SyntheticDataPublisher method main.
public static void main(String[] args) throws IOException {
options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
SyntheticSourceOptions sourceOptions = SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<byte[], byte[]>> syntheticData = pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)));
if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) {
writeToKafka(syntheticData);
}
if (options.getPubSubTopic() != null) {
writeToPubSub(syntheticData);
}
if (allKinesisOptionsConfigured()) {
writeToKinesis(syntheticData);
}
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource in project beam by apache.
the class KafkaIOIT method testKafkaIOReadsAndWritesCorrectlyInBatch.
@Test
public void testKafkaIOReadsAndWritesCorrectlyInBatch() throws IOException {
// Map of hashes of set size collections with 100b records - 10b key, 90b values.
Map<Long, String> expectedHashes = ImmutableMap.of(1000L, "4507649971ee7c51abbb446e65a5c660", 100_000_000L, "0f12c27c9a7672e14775594be66cad9a");
expectedHashcode = getHashForRecordCount(sourceOptions.numRecords, expectedHashes);
writePipeline.apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))).apply("Write to Kafka", writeToKafka());
PCollection<String> hashcode = readPipeline.apply("Read from bounded Kafka", readFromBoundedKafka()).apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))).apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())).apply("Calculate hashcode", Combine.globally(new HashingFn()).withoutDefaults());
PAssert.thatSingleton(hashcode).isEqualTo(expectedHashcode);
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
PipelineResult readResult = readPipeline.run();
PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
cancelIfTimeouted(readResult, readState);
if (!options.isWithTestcontainers()) {
Set<NamedTestResult> metrics = readMetrics(writeResult, readResult);
IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings);
}
}
Aggregations