Search in sources :

Example 51 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToSplunkTest method testGCSToSplunkReadUdf.

@Test
public void testGCSToSplunkReadUdf() {
    // Arrange
    String stringifiedJsonRecord = "{\"id\":\"007\",\"state\":\"CA\",\"price\":26.23}";
    SplunkEvent expectedSplunkEvent = SplunkEvent.newBuilder().withEvent(stringifiedJsonRecord).create();
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());
    coderRegistry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
    GCSToSplunkOptions options = PipelineOptionsFactory.create().as(GCSToSplunkOptions.class);
    options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH);
    options.setJavascriptTextTransformFunctionName("transform");
    options.setContainsHeaders(false);
    options.setInputFileSpec(NO_HEADER_CSV_FILE_PATH);
    // Act
    PCollectionTuple readCsvOut = pipeline.apply("Read CSV", readFromCsv(options));
    PCollectionTuple transformedLines = readCsvOut.apply("Convert to JSON", convertToFailsafeAndMaybeApplyUdf(options));
    PCollectionTuple splunkEventTuple = transformedLines.get(UDF_OUT).apply("Convert to Splunk Event", convertToSplunkEvent());
    // Assert
    PAssert.that(transformedLines.get(UDF_OUT)).satisfies(collection -> {
        FailsafeElement element = collection.iterator().next();
        assertThat(element.getPayload()).isEqualTo(stringifiedJsonRecord);
        return null;
    });
    PAssert.that(transformedLines.get(UDF_ERROR_OUT)).empty();
    PAssert.that(splunkEventTuple.get(SPLUNK_EVENT_OUT)).containsInAnyOrder(expectedSplunkEvent);
    PAssert.that(splunkEventTuple.get(SPLUNK_EVENT_ERROR_OUT)).empty();
    // Execute pipeline
    pipeline.run();
}
Also used : GCSToSplunk.convertToSplunkEvent(com.google.cloud.teleport.v2.templates.GCSToSplunk.convertToSplunkEvent) SplunkEvent(org.apache.beam.sdk.io.splunk.SplunkEvent) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) GCSToSplunkOptions(com.google.cloud.teleport.v2.templates.GCSToSplunk.GCSToSplunkOptions) GCSToSplunk.flattenErrorsAndConvertToString(com.google.cloud.teleport.v2.templates.GCSToSplunk.flattenErrorsAndConvertToString) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test)

Example 52 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class FileFormatConversionTest method testCsvToAvroE2E.

/**
 * Tests if the Csv to Avro pipeline transforms data correctly and stores it in an Avro file.
 */
@Test
public void testCsvToAvroE2E() {
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    String tempDir = temporaryFolder.getRoot().getAbsolutePath() + "/";
    options.setInputFileFormat(CSV);
    options.setOutputFileFormat(AVRO);
    options.setInputFileSpec(CSV_FILE_PATH);
    options.setOutputBucket(tempDir);
    options.setContainsHeaders(true);
    options.setSchema(SCHEMA_FILE_PATH);
    options.setDelimiter("|");
    Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH);
    GenericRecord genericRecords = new GenericData.Record(schema);
    genericRecords.put("id", "007");
    genericRecords.put("state", "CA");
    genericRecords.put("price", 26.23);
    mainPipeline.apply("TestCsvToAvro", FileFormatConversionFactory.FileFormat.newBuilder().setOptions(options).setInputFileFormat(CSV).setOutputFileFormat(AVRO).build());
    mainPipeline.run();
    PCollection<GenericRecord> readAvroFile = readPipeline.apply("ReadAvroFile", AvroConverters.ReadAvroFile.newBuilder().withInputFileSpec(tempDir + "*").withSchema(SCHEMA_FILE_PATH).build());
    PAssert.that(readAvroFile).containsInAnyOrder(genericRecords);
    readPipeline.run();
}
Also used : FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 53 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class FileFormatConversionTest method testCsvToAvroWithEmptyField.

/**
 * Tests if the Csv to Avro pipeline can handle empty fields in the Csv file.
 */
@Test
public void testCsvToAvroWithEmptyField() {
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    String tempDir = temporaryFolder.getRoot().getAbsolutePath() + "/";
    options.setInputFileFormat(CSV);
    options.setOutputFileFormat(AVRO);
    options.setInputFileSpec(CSV_FILE_WITH_MISSING_FIELD_PATH);
    options.setOutputBucket(tempDir);
    options.setContainsHeaders(true);
    options.setSchema(SCHEMA_FILE_TWO_PATH);
    Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_TWO_PATH);
    GenericRecord genericRecords = new GenericData.Record(schema);
    genericRecords.put("id", "007");
    genericRecords.put("state", "CA");
    genericRecords.put("price", null);
    mainPipeline.apply("TestCsvToAvroWithEmptyField", FileFormatConversionFactory.FileFormat.newBuilder().setOptions(options).setInputFileFormat(CSV).setOutputFileFormat(AVRO).build());
    mainPipeline.run();
    PCollection<GenericRecord> readAvroFile = readPipeline.apply("ReadAvroFile", AvroConverters.ReadAvroFile.newBuilder().withInputFileSpec(tempDir + "*").withSchema(SCHEMA_FILE_TWO_PATH).build());
    PAssert.that(readAvroFile).containsInAnyOrder(genericRecords);
    readPipeline.run();
}
Also used : FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 54 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class FileFormatConversionTest method testInvalidFileFormat.

/**
 * Tests {@link FileFormatConversion#run(FileFormatConversionOptions)} throws an exception if an
 * invalid file format is provided.
 */
@Test
public void testInvalidFileFormat() {
    expectedException.expect(RuntimeException.class);
    expectedException.expectMessage("Provide correct input/output file format.");
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    options.setInputFileFormat("INVALID");
    options.setOutputFileFormat(AVRO);
    FileFormatConversion.run(options);
}
Also used : FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions) Test(org.junit.Test)

Example 55 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class FileFormatConversionTest method testSameInputAndOutputFileFormat.

/**
 * Tests {@link FileFormatConversion#run(FileFormatConversionOptions)} throws an exception if the
 * same input and output file formats are provided.
 */
@Test
public void testSameInputAndOutputFileFormat() {
    expectedException.expect(RuntimeException.class);
    expectedException.expectMessage("Provide correct input/output file format.");
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    options.setInputFileFormat(AVRO);
    options.setOutputFileFormat(AVRO);
    FileFormatConversion.run(options);
}
Also used : FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6