use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canCreateShadowTablesForAllDataTables.
@Test
public void canCreateShadowTablesForAllDataTables() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().build();
createDb(testDdl);
testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
true, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNotNull(shadowTable);
assertNotNull(shadowTableInterleaved);
assertEquals(4, finalDdl.allTables().size());
assertThat(shadowTable.primaryKeys(), is(testDdl.table("Table").primaryKeys()));
assertEquals(shadowTable.columns().size(), testDdl.table("Table").primaryKeys().size() + 2);
assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canCreateMissingShadowTables.
@Test
public void canCreateMissingShadowTables() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().createTable("shadow_Table").column("ID").int64().endColumn().column("version").int64().endColumn().primaryKey().asc("ID").end().endTable().build();
createDb(testDdl);
testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
true, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
assertEquals(4, finalDdl.allTables().size());
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNotNull(shadowTable);
assertNotNull(shadowTableInterleaved);
assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canFlagProtectShadowTableCreation.
@Test
public void canFlagProtectShadowTableCreation() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().build();
createDb(testDdl);
testPipeline.apply("Read Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
false, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNull(shadowTable);
assertNull(shadowTableInterleaved);
assertEquals(2, finalDdl.allTables().size());
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.
the class ShadowTableCreatorTest method canConstructShadowTableForMySql.
@Test
public void canConstructShadowTableForMySql() {
Ddl testDdl = ProcessInformationSchemaTest.getTestDdl();
ShadowTableCreator shadowTableCreator = new ShadowTableCreator("mysql", "shadow_");
Table shadowTable = shadowTableCreator.constructShadowTable(testDdl, "Users_interleaved");
/* Verify
* (1) name of shadow table
* (2) primary keys columns are same as data tables
* (3) Has mysql sequence information in addition to primary keys columns
*/
assertEquals(shadowTable.name(), "shadow_Users_interleaved");
assertThat(shadowTable.primaryKeys(), is(testDdl.table("Users_interleaved").primaryKeys()));
Set<String> columns = shadowTable.columns().stream().map(c -> c.name()).collect(Collectors.toSet());
Set<String> expectedColumns = testDdl.table("Users_interleaved").primaryKeys().stream().map(c -> c.name()).collect(Collectors.toSet());
expectedColumns.add("timestamp");
expectedColumns.add("log_file");
expectedColumns.add("log_position");
assertThat(columns, is(expectedColumns));
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.
the class GCSToElasticsearch method run.
/**
* Runs the pipeline to completion with the specified options.
*
* @param options The execution options.
* @return The pipeline result.
*/
private static PipelineResult run(GCSToElasticsearchOptions options) {
// Create the pipeline
Pipeline pipeline = Pipeline.create(options);
// Register the coder for pipeline
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
// Throw error if containsHeaders is true and a schema or Udf is also set.
if (options.getContainsHeaders()) {
checkArgument(options.getJavascriptTextTransformGcsPath() == null && options.getJsonSchemaPath() == null, "Cannot parse file containing headers with UDF or Json schema.");
}
// Throw error if only one retry configuration parameter is set.
checkArgument((options.getMaxRetryAttempts() == null && options.getMaxRetryDuration() == null) || (options.getMaxRetryAttempts() != null && options.getMaxRetryDuration() != null), "To specify retry configuration both max attempts and max duration must be set.");
/*
* Steps: 1) Read records from CSV(s) via {@link CsvConverters.ReadCsv}.
* 2) Convert lines to JSON strings via {@link CsvConverters.LineToFailsafeJson}.
* 3a) Write JSON strings as documents to Elasticsearch via {@link ElasticsearchIO}.
* 3b) Write elements that failed processing to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
*/
PCollectionTuple convertedCsvLines = pipeline.apply("ReadCsv", CsvConverters.ReadCsv.newBuilder().setCsvFormat(options.getCsvFormat()).setDelimiter(options.getDelimiter()).setHasHeaders(options.getContainsHeaders()).setInputFileSpec(options.getInputFileSpec()).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setFileEncoding(options.getCsvFileEncoding()).build()).apply("ConvertLine", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setUdfOutputTag(PROCESSING_OUT).setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT).build());
/*
* Step 3a: Write elements that were successfully processed to Elasticsearch using {@link WriteToElasticsearch}.
*/
convertedCsvLines.get(PROCESSING_OUT).apply("GetJsonDocuments", MapElements.into(TypeDescriptors.strings()).via(FailsafeElement::getPayload)).apply("WriteToElasticsearch", WriteToElasticsearch.newBuilder().setOptions(options.as(GCSToElasticsearchOptions.class)).build());
/*
* Step 3b: Write elements that failed processing to deadletter table via {@link BigQueryIO}.
*/
convertedCsvLines.get(PROCESSING_DEADLETTER_OUT).apply("AddTimestamps", WithTimestamps.of((FailsafeElement<String, String> failures) -> new Instant())).apply("WriteFailedElementsToBigQuery", WriteStringMessageErrors.newBuilder().setErrorRecordsTable(options.getDeadletterTable()).setErrorRecordsTableSchema(SchemaUtils.DEADLETTER_SCHEMA).build());
return pipeline.run();
}
Aggregations