Search in sources :

Example 36 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class SpannerChangeStreamsToGcsTest method testWriteToGCSAvro.

@Test
@Category(IntegrationTest.class)
public // mvn -Dexcluded.spanner.tests="" -Dtest=SpannerChangeStreamsToGcsTest test
void testWriteToGCSAvro() throws Exception {
    // Create a test database.
    String testDatabase = generateDatabaseName();
    fakeDir = tmpDir.newFolder("output").getAbsolutePath();
    fakeTempLocation = tmpDir.newFolder("temporaryLocation").getAbsolutePath();
    spannerServer.dropDatabase(testDatabase);
    // Create a table.
    List<String> statements = new ArrayList<String>();
    final String createTable = "CREATE TABLE " + TEST_TABLE + " (" + "user_id INT64 NOT NULL," + "name STRING(MAX) " + ") PRIMARY KEY(user_id)";
    final String createChangeStream = "CREATE CHANGE STREAM " + TEST_CHANGE_STREAM + " FOR Users";
    statements.add(createTable);
    statements.add(createChangeStream);
    spannerServer.createDatabase(testDatabase, statements);
    Timestamp startTimestamp = Timestamp.now();
    // Create a mutation for the table that will generate 1 data change record.
    List<Mutation> mutations = new ArrayList<>();
    mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(1).set("name").to("Name1").build());
    mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(2).set("name").to("Name2").build());
    spannerServer.getDbClient(testDatabase).write(mutations);
    Timestamp endTimestamp = Timestamp.now();
    SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.create().as(SpannerChangeStreamsToGcsOptions.class);
    options.setSpannerProjectId(TEST_PROJECT);
    options.setSpannerInstanceId(TEST_INSTANCE);
    options.setSpannerDatabase(testDatabase);
    options.setSpannerMetadataInstanceId(TEST_INSTANCE);
    options.setSpannerMetadataDatabase(testDatabase);
    options.setSpannerChangeStreamName(TEST_CHANGE_STREAM);
    options.setStartTimestamp(startTimestamp.toString());
    options.setEndTimestamp(endTimestamp.toString());
    List<String> experiments = new ArrayList<String>();
    options.setExperiments(experiments);
    options.setOutputFileFormat(FileFormat.AVRO);
    options.setGcsOutputDirectory(fakeDir);
    options.setOutputFilenamePrefix(AVRO_FILENAME_PREFIX);
    options.setNumShards(NUM_SHARDS);
    options.setTempLocation(fakeTempLocation);
    // Run the pipeline.
    PipelineResult result = run(options);
    result.waitUntilFinish();
    // Read from the output Avro file to assert that 1 data change record has been generated.
    PCollection<com.google.cloud.teleport.v2.DataChangeRecord> dataChangeRecords = pipeline.apply("readRecords", AvroIO.read(com.google.cloud.teleport.v2.DataChangeRecord.class).from(fakeDir + "/avro-output-*.avro"));
    PAssert.that(dataChangeRecords).satisfies(new VerifyDataChangeRecordAvro());
    pipeline.run();
    // Drop the database.
    spannerServer.dropDatabase(testDatabase);
}
Also used : ArrayList(java.util.ArrayList) DataChangeRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.DataChangeRecord) PipelineResult(org.apache.beam.sdk.PipelineResult) Timestamp(com.google.cloud.Timestamp) SpannerChangeStreamsToGcsOptions(com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions) Mutation(com.google.cloud.spanner.Mutation) Category(org.junit.experimental.categories.Category) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 37 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubsubProtoToBigQueryTest method testGetDescriptorWithInvalidMessageName.

@Test
public void testGetDescriptorWithInvalidMessageName() {
    PubSubProtoToBigQueryOptions options = getOptions();
    options.setProtoSchemaPath(GENERATED_PROTO_SCHEMA_PATH);
    String badMessageName = "invalid.message.Name";
    options.setFullMessageName(badMessageName);
    RuntimeException exception = assertThrows(RuntimeException.class, () -> PubsubProtoToBigQuery.getDescriptor(options));
    assertThat(exception).hasMessageThat().contains(GENERATED_PROTO_SCHEMA_PATH);
    assertThat(exception).hasMessageThat().contains(badMessageName);
}
Also used : PubSubProtoToBigQueryOptions(com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions) Test(org.junit.Test)

Example 38 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubsubProtoToBigQueryTest method testApplyUdfWithPathButNoFunction.

@Test
public void testApplyUdfWithPathButNoFunction() {
    PubSubProtoToBigQueryOptions options = getOptions();
    options.setJavascriptTextTransformGcsPath("/some/path.js");
    PCollection<String> input = pipeline.apply(Create.of(""));
    assertThrows(IllegalArgumentException.class, () -> runUdf(input, options));
    options.setJavascriptTextTransformFunctionName("");
    assertThrows(IllegalArgumentException.class, () -> runUdf(input, options));
    pipeline.run();
}
Also used : PubSubProtoToBigQueryOptions(com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions) Test(org.junit.Test)

Example 39 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubsubProtoToBigQueryTest method testWriteBigQueryWithInvalidJsonSchemaPath.

@Test
public void testWriteBigQueryWithInvalidJsonSchemaPath() {
    PubSubProtoToBigQueryOptions options = getOptions();
    String path = "/some/invalid/path.json";
    options.setBigQueryTableSchemaPath(path);
    IllegalArgumentException exception = assertThrows(// Can pass a null descriptor, since it shouldn't be used.
    IllegalArgumentException.class, () -> PubsubProtoToBigQuery.writeToBigQuery(options, null));
    assertThat(exception).hasMessageThat().contains(path);
}
Also used : PubSubProtoToBigQueryOptions(com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions) Test(org.junit.Test)

Example 40 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubCdcToBigQuery method main.

/**
 * The main entry-point for pipeline execution. This method will start the pipeline but will not
 * wait for it's execution to finish. If blocking execution is required, use the {@link
 * PubSubCdcToBigQuery#run(Options)} method to start the pipeline and invoke {@code
 * result.waitUntilFinish()} on the {@link PipelineResult}.
 *
 * @param args The command-line args passed by the executor.
 */
public static void main(String[] args) {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    run(options);
}
Also used : InputUDFOptions(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFOptions) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6