Search in sources :

Example 71 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubCdcToBigQueryTest method testPubSubCdcToBigQueryApplyJavaScriptUDF.

/**
 * Tests the {@link PubSubCdcToBigQuery} pipeline end-to-end.
 */
@Test
public void testPubSubCdcToBigQueryApplyJavaScriptUDF() throws Exception {
    // Test input
    final String payload = "{\"ticker\": \"GOOGL\", \"price\": 1006.94}";
    final PubsubMessage message = new PubsubMessage(payload.getBytes(), ImmutableMap.of("id", "123", "type", "custom_event"));
    final Instant timestamp = new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant();
    final FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    // Parameters
    String transformPath = TRANSFORM_FILE_PATH;
    String transformFunction = "transform";
    PubSubCdcToBigQuery.Options options = PipelineOptionsFactory.create().as(PubSubCdcToBigQuery.Options.class);
    options.setJavascriptTextTransformGcsPath(transformPath);
    options.setJavascriptTextTransformFunctionName(transformFunction);
    InputUDFToTableRow<String> deadletterHandler = new InputUDFToTableRow<String>(options.getJavascriptTextTransformGcsPath(), options.getJavascriptTextTransformFunctionName(), options.getPythonTextTransformGcsPath(), options.getPythonTextTransformFunctionName(), options.getRuntimeRetries(), coder);
    // Build pipeline
    PCollectionTuple transformOut = pipeline.apply("CreateInput", Create.timestamped(TimestampedValue.of(message, timestamp)).withCoder(PubsubMessageWithAttributesCoder.of())).apply("ConvertPubSubToFailsafe", ParDo.of(new PubSubToFailSafeElement())).apply("ConvertMessageToTableRow", deadletterHandler);
    transformOut.get(deadletterHandler.udfDeadletterOut).setCoder(coder);
    transformOut.get(deadletterHandler.transformDeadletterOut).setCoder(coder);
    // Assert
    PAssert.that(transformOut.get(deadletterHandler.udfDeadletterOut)).empty();
    PAssert.that(transformOut.get(deadletterHandler.transformDeadletterOut)).empty();
    PAssert.that(transformOut.get(deadletterHandler.transformOut)).satisfies(collection -> {
        TableRow result = collection.iterator().next();
        assertThat(result.get("ticker"), is(equalTo("GOOGL")));
        assertThat(result.get("price"), is(equalTo(1006.94)));
        return null;
    });
    // Execute pipeline
    pipeline.run();
}
Also used : Instant(org.joda.time.Instant) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) DateTime(org.joda.time.DateTime) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PubSubToFailSafeElement(com.google.cloud.teleport.v2.transforms.PubSubToFailSafeElement) TableRow(com.google.api.services.bigquery.model.TableRow) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) Test(org.junit.Test)

Example 72 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class KafkaToGCSTest method testFileFormatFactoryInvalid.

/**
 * Test whether {@link FileFormatFactory} maps the output file format to the transform to be
 * carried out. And throws illegal argument exception if invalid file format is passed.
 */
@Test
@Category(NeedsRunner.class)
public void testFileFormatFactoryInvalid() {
    // Create the test input.
    final String key = "Name";
    final String value = "Generic";
    final KV<String, String> message = KV.of(key, value);
    final String outputDirectory = "gs://bucket_name/path/to/output-location";
    final String outputFileFormat = "json".toUpperCase();
    final String outputFilenamePrefix = "output";
    final Integer numShards = 1;
    final String tempOutputDirectory = "gs://bucket_name/path/to/temp-location";
    KafkaToGCSOptions options = PipelineOptionsFactory.create().as(KafkaToGCSOptions.class);
    options.setOutputFileFormat(outputFileFormat);
    options.setOutputDirectory(outputDirectory);
    options.setOutputFilenamePrefix(outputFilenamePrefix);
    options.setNumShards(numShards);
    options.setTempLocation(tempOutputDirectory);
    exception.expect(IllegalArgumentException.class);
    PCollection<KV<String, String>> records = pipeline.apply("CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
    records.apply("WriteToGCS", FileFormatFactory.newBuilder().setOptions(options).build());
    // Run the pipeline.
    pipeline.run();
}
Also used : KafkaToGCSOptions(com.google.cloud.teleport.v2.options.KafkaToGCSOptions) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 73 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class KafkaToPubsub method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    KafkaToPubsubOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(KafkaToPubsubOptions.class);
    run(options);
}
Also used : KafkaToPubsubOptions(com.google.cloud.teleport.v2.options.KafkaToPubsubOptions)

Example 74 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class KafkaToGCS method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    KafkaToGCSOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(KafkaToGCSOptions.class);
    run(options);
}
Also used : KafkaToGCSOptions(com.google.cloud.teleport.v2.options.KafkaToGCSOptions)

Example 75 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class CsvConvertersTest method testLineToFailsafeJsonNoHeadersUdfDeadletter.

/**
 * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement}
 * correctly using a Javascript Udf. Udf processing is handled by {@link
 * JavascriptTextTransformer}. Should output record to deadletter table tag.
 */
@Test
public void testLineToFailsafeJsonNoHeadersUdfDeadletter() {
    FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER;
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    PCollection<String> lines = pipeline.apply(Create.of(BAD_JSON_STRING_RECORD).withCoder(StringUtf8Coder.of()));
    PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines);
    CsvConverters.CsvPipelineOptions options = PipelineOptionsFactory.create().as(CsvConverters.CsvPipelineOptions.class);
    options.setDelimiter(",");
    options.setJavascriptTextTransformGcsPath(SCRIPT_PARSE_EXCEPTION_FILE_PATH);
    options.setJavascriptTextTransformFunctionName("transform");
    PCollectionTuple failsafe = linesTuple.apply("TestLineToFailsafeJsonNoHeadersUdfBad", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setJsonSchemaPath(null).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setUdfOutputTag(PROCESSING_OUT).setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT).build());
    PAssert.that(failsafe.get(PROCESSING_OUT)).empty();
    PAssert.that(failsafe.get(PROCESSING_DEADLETTER_OUT)).satisfies(collection -> {
        FailsafeElement result = collection.iterator().next();
        assertThat(result.getPayload(), is(equalTo(BAD_JSON_STRING_RECORD)));
        return null;
    });
    pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6