Search in sources :

Example 81 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class ParquetIOIT method writeThenReadAll.

@Test
public void writeThenReadAll() {
    PCollection<String> testFiles = pipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Produce Avro records", ParDo.of(new DeterministicallyConstructAvroRecordsFn())).setCoder(AvroCoder.of(SCHEMA)).apply("Gather write start times", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "writeStart"))).apply("Write Parquet files", FileIO.<GenericRecord>write().via(ParquetIO.sink(SCHEMA)).to(filenamePrefix)).getPerDestinationOutputFilenames().apply("Gather write end times", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "writeEnd"))).apply("Get file names", Values.create());
    PCollection<String> consolidatedHashcode = testFiles.apply("Find files", FileIO.matchAll()).apply("Read matched files", FileIO.readMatches()).apply("Gather read start time", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "readStart"))).apply("Read parquet files", ParquetIO.readFiles(SCHEMA)).apply("Gather read end time", ParDo.of(new TimeMonitor<>(PARQUET_NAMESPACE, "readEnd"))).apply("Map records to strings", MapElements.into(strings()).via((SerializableFunction<GenericRecord, String>) record -> String.valueOf(record.get("row")))).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
    testFiles.apply("Delete test files", ParDo.of(new FileBasedIOITHelper.DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishMetrics(result);
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) FileIO(org.apache.beam.sdk.io.FileIO) BeforeClass(org.junit.BeforeClass) PipelineResult(org.apache.beam.sdk.PipelineResult) MetricsReader(org.apache.beam.sdk.testutils.metrics.MetricsReader) Combine(org.apache.beam.sdk.transforms.Combine) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) View(org.apache.beam.sdk.transforms.View) Timestamp(com.google.cloud.Timestamp) FileBasedIOITHelper(org.apache.beam.sdk.io.common.FileBasedIOITHelper) Function(java.util.function.Function) HashSet(java.util.HashSet) InfluxDBSettings(org.apache.beam.sdk.testutils.publishing.InfluxDBSettings) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TypeDescriptors.strings(org.apache.beam.sdk.values.TypeDescriptors.strings) NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) DoFn(org.apache.beam.sdk.transforms.DoFn) MapElements(org.apache.beam.sdk.transforms.MapElements) GenericRecord(org.apache.avro.generic.GenericRecord) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Schema(org.apache.avro.Schema) FileBasedIOITHelper.readFileBasedIOITPipelineOptions(org.apache.beam.sdk.io.common.FileBasedIOITHelper.readFileBasedIOITPipelineOptions) PAssert(org.apache.beam.sdk.testing.PAssert) FileBasedIOITHelper.appendTimestampSuffix(org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampSuffix) TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) Set(java.util.Set) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) UUID(java.util.UUID) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) IOITMetrics(org.apache.beam.sdk.testutils.metrics.IOITMetrics) Rule(org.junit.Rule) ParDo(org.apache.beam.sdk.transforms.ParDo) FileBasedIOTestPipelineOptions(org.apache.beam.sdk.io.common.FileBasedIOTestPipelineOptions) Values(org.apache.beam.sdk.transforms.Values) FileBasedIOITHelper(org.apache.beam.sdk.io.common.FileBasedIOITHelper) PipelineResult(org.apache.beam.sdk.PipelineResult) GenericRecord(org.apache.avro.generic.GenericRecord) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Example 82 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TFRecordIOIT method writeThenReadAll.

// TODO: There are two pipelines due to: https://issues.apache.org/jira/browse/BEAM-3267
@Test
public void writeThenReadAll() {
    final TFRecordIO.Write writeTransform = TFRecordIO.write().to(filenamePrefix).withCompression(compressionType).withSuffix(".tfrecord");
    writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Transform strings to bytes", MapElements.via(new StringToByteArray())).apply("Record time before writing", ParDo.of(new TimeMonitor<>(TFRECORD_NAMESPACE, WRITE_TIME))).apply("Write content to files", writeTransform);
    final PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    String filenamePattern = createFilenamePattern();
    PCollection<String> consolidatedHashcode = readPipeline.apply(TFRecordIO.read().from(filenamePattern).withCompression(AUTO)).apply("Record time after reading", ParDo.of(new TimeMonitor<>(TFRECORD_NAMESPACE, READ_TIME))).apply("Transform bytes to strings", MapElements.via(new ByteArrayToString())).apply("Calculate hashcode", Combine.globally(new HashingFn())).apply(Reshuffle.viaRandomKey());
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
    readPipeline.apply(Create.of(filenamePattern)).apply("Delete test files", ParDo.of(new DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
    final PipelineResult readResult = readPipeline.run();
    readResult.waitUntilFinish();
    collectAndPublishMetrics(writeResult, readResult);
}
Also used : FileBasedIOITHelper(org.apache.beam.sdk.io.common.FileBasedIOITHelper) PipelineResult(org.apache.beam.sdk.PipelineResult) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeleteFileFn(org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn) TFRecordIO(org.apache.beam.sdk.io.TFRecordIO) Test(org.junit.Test)

Example 83 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class StreamingSnowflakeIOIT method writeStreamToSnowflake.

private void writeStreamToSnowflake() {
    TestStream<TestRow> stringsStream = TestStream.create(SerializableCoder.of(TestRow.class)).advanceWatermarkTo(Instant.now()).addElements(testRows.get(0), testRows.subList(1, testRows.size()).toArray(new TestRow[0])).advanceWatermarkToInfinity();
    pipeline.apply(stringsStream).apply("Write SnowflakeIO", SnowflakeIO.<TestRow>write().withDataSourceConfiguration(dc).withUserDataMapper(getTestRowDataMapper()).withSnowPipe(options.getSnowPipe()).withStorageIntegrationName(storageIntegrationName).withStagingBucketName(stagingBucketName).withFlushTimeLimit(Duration.millis(18000)).withFlushRowLimit(50000).withDebugMode(StreamingLogLevel.ERROR));
    PipelineResult pipelineResult = pipeline.run(options);
    pipelineResult.waitUntilFinish();
}
Also used : TestRow(org.apache.beam.sdk.io.common.TestRow) PipelineResult(org.apache.beam.sdk.PipelineResult)

Example 84 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class BatchSnowflakeIOIT method testWriteThenRead.

@Test
public void testWriteThenRead() {
    PipelineResult writeResult = runWrite();
    writeResult.waitUntilFinish();
    PipelineResult readResult = runRead();
    readResult.waitUntilFinish();
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 85 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class FhirIOReadIT method testFhirIORead.

@Test
public void testFhirIORead() throws Exception {
    pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);
    FhirIO.Read.Result result = pipeline.apply(PubsubIO.readStrings().fromSubscription(pubsubSubscription)).apply(FhirIO.readResources());
    PCollection<String> resources = result.getResources();
    resources.apply("waitForAnyMessage", signal.signalSuccessWhen(resources.getCoder(), anyResources -> true));
    // wait for any resource
    Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
    pipeline.apply(signal.signalStart());
    PipelineResult job = pipeline.run();
    start.get();
    signal.waitForSuccess(Duration.standardMinutes(5));
    // A runner may not support cancel
    try {
        job.cancel();
    } catch (UnsupportedOperationException exc) {
    // noop
    }
}
Also used : Arrays(java.util.Arrays) TopicPath(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) SecureRandom(java.security.SecureRandom) Supplier(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) After(org.junit.After) DirectOptions(org.apache.beam.runners.direct.DirectOptions) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) PubsubClient(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient) TestPubsubOptions(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubOptions) Collection(java.util.Collection) SubscriptionPath(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SubscriptionPath) IOException(java.io.IOException) PubsubGrpcClient(org.apache.beam.sdk.io.gcp.pubsub.PubsubGrpcClient) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) PubsubIO(org.apache.beam.sdk.io.gcp.pubsub.PubsubIO) Rule(org.junit.Rule) HEALTHCARE_DATASET_TEMPLATE(org.apache.beam.sdk.io.gcp.healthcare.HL7v2IOTestUtil.HEALTHCARE_DATASET_TEMPLATE) PipelineResult(org.apache.beam.sdk.PipelineResult) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6