use of com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canCreateShadowTablesForAllDataTables.
@Test
public void canCreateShadowTablesForAllDataTables() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().build();
createDb(testDdl);
testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
true, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNotNull(shadowTable);
assertNotNull(shadowTableInterleaved);
assertEquals(4, finalDdl.allTables().size());
assertThat(shadowTable.primaryKeys(), is(testDdl.table("Table").primaryKeys()));
assertEquals(shadowTable.columns().size(), testDdl.table("Table").primaryKeys().size() + 2);
assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
use of com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canCreateMissingShadowTables.
@Test
public void canCreateMissingShadowTables() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().createTable("shadow_Table").column("ID").int64().endColumn().column("version").int64().endColumn().primaryKey().asc("ID").end().endTable().build();
createDb(testDdl);
testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
true, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
assertEquals(4, finalDdl.allTables().size());
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNotNull(shadowTable);
assertNotNull(shadowTableInterleaved);
assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
use of com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canFlagProtectShadowTableCreation.
@Test
public void canFlagProtectShadowTableCreation() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().build();
createDb(testDdl);
testPipeline.apply("Read Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
false, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNull(shadowTable);
assertNull(shadowTableInterleaved);
assertEquals(2, finalDdl.allTables().size());
}
use of com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToSpanner method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to Cloud Spanner
* 3) Write Failures to GCS Dead Letter Queue
*/
Pipeline pipeline = Pipeline.create(options);
DeadLetterQueueManager dlqManager = buildDlqManager(options);
/*
* Stage 1: Ingest/Normalize Data to FailsafeElement with JSON Strings and
* read Cloud Spanner information schema.
* a) Prepare spanner config and process information schema
* b) Read DataStream data from GCS into JSON String FailsafeElements
* c) Reconsume Dead Letter Queue data from GCS into JSON String FailsafeElements
* d) Flatten DataStream and DLQ Streams
*/
// Prepare Spanner config
SpannerConfig spannerConfig = ExposedSpannerConfig.create().withHost(ValueProvider.StaticValueProvider.of(options.getSpannerHost())).withInstanceId(ValueProvider.StaticValueProvider.of(options.getInstanceId())).withDatabaseId(ValueProvider.StaticValueProvider.of(options.getDatabaseId()));
/* Process information schema
* 1) Read information schema from destination Cloud Spanner database
* 2) Check if shadow tables are present and create if necessary
* 3) Return new information schema
*/
PCollection<Ddl> ddl = pipeline.apply("Process Information Schema", new ProcessInformationSchema(spannerConfig, options.getShouldCreateShadowTables(), options.getShadowTablePrefix(), options.getDatastreamSourceType()));
PCollectionView<Ddl> ddlView = ddl.apply("Cloud Spanner DDL as view", View.asSingleton());
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
// Elements sent to the Dead Letter Queue are to be reconsumed.
// A DLQManager is to be created using PipelineOptions, and it is in charge
// of building pieces of the DLQ.
PCollectionTuple reconsumedElements = dlqManager.getReconsumerDataTransform(pipeline.apply(dlqManager.dlqReconsumer(options.getDlqRetryMinutes())));
PCollection<FailsafeElement<String, String>> dlqJsonRecords = reconsumedElements.get(DeadLetterQueueManager.RETRYABLE_ERRORS).setCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).and(dlqJsonRecords).apply(Flatten.pCollections()).apply("Reshuffle", Reshuffle.viaRandomKey());
/*
* Stage 2: Write records to Cloud Spanner
*/
SpannerTransactionWriter.Result spannerWriteResults = jsonRecords.apply("Write events to Cloud Spanner", new SpannerTransactionWriter(spannerConfig, ddlView, options.getShadowTablePrefix(), options.getDatastreamSourceType()));
/*
* Stage 3: Write failures to GCS Dead Letter Queue
* a) Retryable errors are written to retry GCS Dead letter queue
* b) Severe errors are written to severe GCS Dead letter queue
*/
spannerWriteResults.retryableErrors().apply("DLQ: Write retryable Failures to GCS", MapElements.via(new StringDeadLetterQueueSanitizer())).setCoder(StringUtf8Coder.of()).apply("Write To DLQ", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqManager.getRetryDlqDirectoryWithDateTime()).withTmpDirectory(dlqManager.getRetryDlqDirectory() + "tmp/").build());
PCollection<FailsafeElement<String, String>> dlqErrorRecords = reconsumedElements.get(DeadLetterQueueManager.PERMANENT_ERRORS).setCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
PCollection<FailsafeElement<String, String>> permanentErrors = PCollectionList.of(dlqErrorRecords).and(spannerWriteResults.permanentErrors()).apply(Flatten.pCollections()).apply("Reshuffle", Reshuffle.viaRandomKey());
permanentErrors.apply("DLQ: Write Severe errors to GCS", MapElements.via(new StringDeadLetterQueueSanitizer())).setCoder(StringUtf8Coder.of()).apply("Write To DLQ", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqManager.getSevereDlqDirectoryWithDateTime()).withTmpDirectory(dlqManager.getSevereDlqDirectory() + "tmp/").build());
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerStreamingWriteIntegrationTest method constructAndRunPipeline.
private void constructAndRunPipeline(PCollection<FailsafeElement<String, String>> jsonRecords) {
String shadowTablePrefix = "shadow";
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
PCollection<Ddl> ddl = testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, true, shadowTablePrefix, "oracle"));
PCollectionView<Ddl> ddlView = ddl.apply("Cloud Spanner DDL as view", View.asSingleton());
jsonRecords.apply("Write events to Cloud Spanner", new SpannerTransactionWriter(sourceConfig, ddlView, shadowTablePrefix, "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
}
Aggregations