use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class ParquetIOIT method writeThenReadAll.
@Test
public void writeThenReadAll() {
PCollection<String> testFiles = pipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Produce Avro records", ParDo.of(new DeterministicallyConstructAvroRecordsFn())).setCoder(AvroCoder.of(SCHEMA)).apply("Gather write start times", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "writeStart"))).apply("Write Parquet files", FileIO.<GenericRecord>write().via(ParquetIO.sink(SCHEMA)).to(filenamePrefix)).getPerDestinationOutputFilenames().apply("Gather write end times", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "writeEnd"))).apply("Get file names", Values.create());
PCollection<String> consolidatedHashcode = testFiles.apply("Find files", FileIO.matchAll()).apply("Read matched files", FileIO.readMatches()).apply("Gather read start time", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "readStart"))).apply("Read parquet files", ParquetIO.readFiles(SCHEMA)).apply("Gather read end time", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "readEnd"))).apply("Map records to strings", MapElements.into(strings()).via((SerializableFunction<GenericRecord, String>) record -> String.valueOf(record.get("row")))).apply("Calculate hashcode", Combine.globally(new HashingFn()));
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
testFiles.apply("Delete test files", ParDo.of(new FileBasedIOITHelper.DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class TFRecordIOIT method writeThenReadAll.
// TODO: There are two pipelines due to: https://issues.apache.org/jira/browse/BEAM-3267
@Test
public void writeThenReadAll() {
final TFRecordIO.Write writeTransform = TFRecordIO.write().to(filenamePrefix).withCompression(compressionType).withSuffix(".tfrecord");
writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Transform strings to bytes", MapElements.via(new StringToByteArray())).apply("Record time before writing", ParDo.of(new TimeMonitor<>(TFRECORD_NAMESPACE, WRITE_TIME))).apply("Write content to files", writeTransform);
final PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
String filenamePattern = createFilenamePattern();
PCollection<String> consolidatedHashcode = readPipeline.apply(TFRecordIO.read().from(filenamePattern).withCompression(AUTO)).apply("Record time after reading", ParDo.of(new TimeMonitor<>(TFRECORD_NAMESPACE, READ_TIME))).apply("Transform bytes to strings", MapElements.via(new ByteArrayToString())).apply("Calculate hashcode", Combine.globally(new HashingFn())).apply(Reshuffle.viaRandomKey());
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
readPipeline.apply(Create.of(filenamePattern)).apply("Delete test files", ParDo.of(new DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
final PipelineResult readResult = readPipeline.run();
readResult.waitUntilFinish();
collectAndPublishMetrics(writeResult, readResult);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class StreamingSnowflakeIOIT method writeStreamToSnowflake.
private void writeStreamToSnowflake() {
TestStream<TestRow> stringsStream = TestStream.create(SerializableCoder.of(TestRow.class)).advanceWatermarkTo(Instant.now()).addElements(testRows.get(0), testRows.subList(1, testRows.size()).toArray(new TestRow[0])).advanceWatermarkToInfinity();
pipeline.apply(stringsStream).apply("Write SnowflakeIO", SnowflakeIO.<TestRow>write().withDataSourceConfiguration(dc).withUserDataMapper(getTestRowDataMapper()).withSnowPipe(options.getSnowPipe()).withStorageIntegrationName(storageIntegrationName).withStagingBucketName(stagingBucketName).withFlushTimeLimit(Duration.millis(18000)).withFlushRowLimit(50000).withDebugMode(StreamingLogLevel.ERROR));
PipelineResult pipelineResult = pipeline.run(options);
pipelineResult.waitUntilFinish();
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class BatchSnowflakeIOIT method testWriteThenRead.
@Test
public void testWriteThenRead() {
PipelineResult writeResult = runWrite();
writeResult.waitUntilFinish();
PipelineResult readResult = runRead();
readResult.waitUntilFinish();
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class FhirIOReadIT method testFhirIORead.
@Test
public void testFhirIORead() throws Exception {
pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);
FhirIO.Read.Result result = pipeline.apply(PubsubIO.readStrings().fromSubscription(pubsubSubscription)).apply(FhirIO.readResources());
PCollection<String> resources = result.getResources();
resources.apply("waitForAnyMessage", signal.signalSuccessWhen(resources.getCoder(), anyResources -> true));
// wait for any resource
Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
pipeline.apply(signal.signalStart());
PipelineResult job = pipeline.run();
start.get();
signal.waitForSuccess(Duration.standardMinutes(5));
// A runner may not support cancel
try {
job.cancel();
} catch (UnsupportedOperationException exc) {
// noop
}
}
Aggregations