use of com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerChangeStreamsToGcsTest method testInvalidWindowDuration.
@Test
public void testInvalidWindowDuration() {
exception.expect(IllegalArgumentException.class);
exception.expectMessage("The window duration must be greater than 0!");
SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.create().as(SpannerChangeStreamsToGcsOptions.class);
options.setOutputFileFormat(FileFormat.AVRO);
options.setGcsOutputDirectory(fakeDir);
options.setOutputFilenamePrefix(FILENAME_PREFIX);
options.setNumShards(NUM_SHARDS);
options.setTempLocation(fakeTempLocation);
options.setWindowDuration("invalidWindowDuration");
Pipeline p = Pipeline.create(options);
Timestamp startTimestamp = Timestamp.now();
Timestamp endTimestamp = Timestamp.now();
p.apply(SpannerIO.readChangeStream().withSpannerConfig(SpannerConfig.create().withProjectId("project").withInstanceId("instance").withDatabaseId("db")).withMetadataInstance("instance").withMetadataDatabase("db").withChangeStreamName("changestream").withInclusiveStartAt(startTimestamp).withInclusiveEndAt(endTimestamp).withRpcPriority(RpcPriority.HIGH)).apply("Creating " + options.getWindowDuration() + " Window", Window.into(FixedWindows.of(DurationUtils.parseDuration(options.getWindowDuration())))).apply("Write To GCS", FileFormatFactorySpannerChangeStreams.newBuilder().setOptions(options).build());
p.run();
}
use of com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerChangeStreamsToGcsTest method testWriteToGCSAvro.
@Test
@Category(IntegrationTest.class)
public // mvn -Dexcluded.spanner.tests="" -Dtest=SpannerChangeStreamsToGcsTest test
void testWriteToGCSAvro() throws Exception {
// Create a test database.
String testDatabase = generateDatabaseName();
fakeDir = tmpDir.newFolder("output").getAbsolutePath();
fakeTempLocation = tmpDir.newFolder("temporaryLocation").getAbsolutePath();
spannerServer.dropDatabase(testDatabase);
// Create a table.
List<String> statements = new ArrayList<String>();
final String createTable = "CREATE TABLE " + TEST_TABLE + " (" + "user_id INT64 NOT NULL," + "name STRING(MAX) " + ") PRIMARY KEY(user_id)";
final String createChangeStream = "CREATE CHANGE STREAM " + TEST_CHANGE_STREAM + " FOR Users";
statements.add(createTable);
statements.add(createChangeStream);
spannerServer.createDatabase(testDatabase, statements);
Timestamp startTimestamp = Timestamp.now();
// Create a mutation for the table that will generate 1 data change record.
List<Mutation> mutations = new ArrayList<>();
mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(1).set("name").to("Name1").build());
mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(2).set("name").to("Name2").build());
spannerServer.getDbClient(testDatabase).write(mutations);
Timestamp endTimestamp = Timestamp.now();
SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.create().as(SpannerChangeStreamsToGcsOptions.class);
options.setSpannerProjectId(TEST_PROJECT);
options.setSpannerInstanceId(TEST_INSTANCE);
options.setSpannerDatabase(testDatabase);
options.setSpannerMetadataInstanceId(TEST_INSTANCE);
options.setSpannerMetadataDatabase(testDatabase);
options.setSpannerChangeStreamName(TEST_CHANGE_STREAM);
options.setStartTimestamp(startTimestamp.toString());
options.setEndTimestamp(endTimestamp.toString());
List<String> experiments = new ArrayList<String>();
options.setExperiments(experiments);
options.setOutputFileFormat(FileFormat.AVRO);
options.setGcsOutputDirectory(fakeDir);
options.setOutputFilenamePrefix(AVRO_FILENAME_PREFIX);
options.setNumShards(NUM_SHARDS);
options.setTempLocation(fakeTempLocation);
// Run the pipeline.
PipelineResult result = run(options);
result.waitUntilFinish();
// Read from the output Avro file to assert that 1 data change record has been generated.
PCollection<com.google.cloud.teleport.v2.DataChangeRecord> dataChangeRecords = pipeline.apply("readRecords", AvroIO.read(com.google.cloud.teleport.v2.DataChangeRecord.class).from(fakeDir + "/avro-output-*.avro"));
PAssert.that(dataChangeRecords).satisfies(new VerifyDataChangeRecordAvro());
pipeline.run();
// Drop the database.
spannerServer.dropDatabase(testDatabase);
}
use of com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerChangeStreamsToGcsTest method testFileFormatFactoryInvalid.
/**
* Test whether {@link FileFormatFactory} maps the output file format to the transform to be
* carried out. And throws illegal argument exception if invalid file format is passed.
*/
@Test
public void testFileFormatFactoryInvalid() {
exception.expect(IllegalArgumentException.class);
exception.expectMessage("Invalid output format:PARQUET. Supported output formats: TEXT, AVRO");
SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.create().as(SpannerChangeStreamsToGcsOptions.class);
options.setOutputFileFormat(FileFormat.PARQUET);
options.setGcsOutputDirectory(fakeDir);
options.setOutputFilenamePrefix(FILENAME_PREFIX);
options.setNumShards(NUM_SHARDS);
options.setTempLocation(fakeTempLocation);
Pipeline p = Pipeline.create(options);
Timestamp startTimestamp = Timestamp.now();
Timestamp endTimestamp = Timestamp.now();
p.apply(SpannerIO.readChangeStream().withSpannerConfig(SpannerConfig.create().withProjectId("project").withInstanceId("instance").withDatabaseId("db")).withMetadataInstance("instance").withMetadataDatabase("db").withChangeStreamName("changestream").withInclusiveStartAt(startTimestamp).withInclusiveEndAt(endTimestamp).withRpcPriority(RpcPriority.HIGH)).apply("Creating " + options.getWindowDuration() + " Window", Window.into(FixedWindows.of(DurationUtils.parseDuration(options.getWindowDuration())))).apply("Write To GCS", FileFormatFactorySpannerChangeStreams.newBuilder().setOptions(options).build());
p.run();
}
use of com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerChangeStreamsToGcsTest method testWriteToGCSText.
@Test
@Category(IntegrationTest.class)
public // mvn -Dexcluded.spanner.tests="" -Dtest=SpannerChangeStreamsToGcsTest test
void testWriteToGCSText() throws Exception {
// Create a test database.
String testDatabase = generateDatabaseName();
fakeDir = tmpDir.newFolder("output").getAbsolutePath();
fakeTempLocation = tmpDir.newFolder("temporaryLocation").getAbsolutePath();
spannerServer.dropDatabase(testDatabase);
// Create a table.
List<String> statements = new ArrayList<String>();
final String createTable = "CREATE TABLE " + TEST_TABLE + " (" + "user_id INT64 NOT NULL," + "name STRING(MAX) " + ") PRIMARY KEY(user_id)";
final String createChangeStream = "CREATE CHANGE STREAM " + TEST_CHANGE_STREAM + " FOR Users";
statements.add(createTable);
statements.add(createChangeStream);
spannerServer.createDatabase(testDatabase, statements);
Timestamp startTimestamp = Timestamp.now();
// Create a mutation for the table that will generate 1 data change record.
List<Mutation> mutations = new ArrayList<>();
mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(1).set("name").to("Name1").build());
mutations.add(Mutation.newInsertBuilder(TEST_TABLE).set("user_id").to(2).set("name").to("Name2").build());
spannerServer.getDbClient(testDatabase).write(mutations);
Timestamp endTimestamp = Timestamp.now();
SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.create().as(SpannerChangeStreamsToGcsOptions.class);
options.setSpannerProjectId(TEST_PROJECT);
options.setSpannerInstanceId(TEST_INSTANCE);
options.setSpannerDatabase(testDatabase);
options.setSpannerMetadataInstanceId(TEST_INSTANCE);
options.setSpannerMetadataDatabase(testDatabase);
options.setSpannerChangeStreamName(TEST_CHANGE_STREAM);
options.setStartTimestamp(startTimestamp.toString());
options.setEndTimestamp(endTimestamp.toString());
List<String> experiments = new ArrayList<String>();
options.setExperiments(experiments);
options.setOutputFileFormat(FileFormat.TEXT);
options.setGcsOutputDirectory(fakeDir);
options.setOutputFilenamePrefix(TEXT_FILENAME_PREFIX);
options.setNumShards(NUM_SHARDS);
options.setTempLocation(fakeTempLocation);
// Run the pipeline.
PipelineResult result = run(options);
result.waitUntilFinish();
// Read from the output Avro file to assert that 1 data change record has been generated.
PCollection<String> dataChangeRecords = pipeline.apply("readRecords", TextIO.read().from(fakeDir + "/text-output-*.txt"));
PAssert.that(dataChangeRecords).satisfies(new VerifyDataChangeRecordText());
pipeline.run();
// Drop the database.
spannerServer.dropDatabase(testDatabase);
}
use of com.google.cloud.teleport.v2.options.SpannerChangeStreamsToGcsOptions in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerChangeStreamsToGcs method main.
public static void main(String[] args) {
LOG.info("Starting Input Files to GCS");
SpannerChangeStreamsToGcsOptions options = PipelineOptionsFactory.fromArgs(args).as(SpannerChangeStreamsToGcsOptions.class);
run(options);
}
Aggregations