Search in sources :

Example 1 with Table

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.

the class ProcessInformationSchemaIntegrationTest method canCreateShadowTablesForAllDataTables.

@Test
public void canCreateShadowTablesForAllDataTables() throws Exception {
    SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
    Ddl testDdl = getTestDdlBuilder().build();
    createDb(testDdl);
    testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
    true, "shadow", "oracle"));
    PipelineResult testResult = testPipeline.run();
    testResult.waitUntilFinish();
    Ddl finalDdl = readDdl(testDb);
    Table shadowTable = finalDdl.table("shadow_Table");
    Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
    assertNotNull(shadowTable);
    assertNotNull(shadowTableInterleaved);
    assertEquals(4, finalDdl.allTables().size());
    assertThat(shadowTable.primaryKeys(), is(testDdl.table("Table").primaryKeys()));
    assertEquals(shadowTable.columns().size(), testDdl.table("Table").primaryKeys().size() + 2);
    assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
    assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) PipelineResult(org.apache.beam.sdk.PipelineResult) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 2 with Table

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.

the class ProcessInformationSchemaIntegrationTest method canCreateMissingShadowTables.

@Test
public void canCreateMissingShadowTables() throws Exception {
    SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
    Ddl testDdl = getTestDdlBuilder().createTable("shadow_Table").column("ID").int64().endColumn().column("version").int64().endColumn().primaryKey().asc("ID").end().endTable().build();
    createDb(testDdl);
    testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
    true, "shadow", "oracle"));
    PipelineResult testResult = testPipeline.run();
    testResult.waitUntilFinish();
    Ddl finalDdl = readDdl(testDb);
    assertEquals(4, finalDdl.allTables().size());
    Table shadowTable = finalDdl.table("shadow_Table");
    Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
    assertNotNull(shadowTable);
    assertNotNull(shadowTableInterleaved);
    assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
    assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) PipelineResult(org.apache.beam.sdk.PipelineResult) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 3 with Table

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.

the class ProcessInformationSchemaIntegrationTest method canFlagProtectShadowTableCreation.

@Test
public void canFlagProtectShadowTableCreation() throws Exception {
    SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
    Ddl testDdl = getTestDdlBuilder().build();
    createDb(testDdl);
    testPipeline.apply("Read Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
    false, "shadow", "oracle"));
    PipelineResult testResult = testPipeline.run();
    testResult.waitUntilFinish();
    Ddl finalDdl = readDdl(testDb);
    Table shadowTable = finalDdl.table("shadow_Table");
    Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
    assertNull(shadowTable);
    assertNull(shadowTableInterleaved);
    assertEquals(2, finalDdl.allTables().size());
}
Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) PipelineResult(org.apache.beam.sdk.PipelineResult) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 4 with Table

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.

the class ShadowTableCreatorTest method canConstructShadowTableForMySql.

@Test
public void canConstructShadowTableForMySql() {
    Ddl testDdl = ProcessInformationSchemaTest.getTestDdl();
    ShadowTableCreator shadowTableCreator = new ShadowTableCreator("mysql", "shadow_");
    Table shadowTable = shadowTableCreator.constructShadowTable(testDdl, "Users_interleaved");
    /* Verify
     * (1) name of shadow table
     * (2) primary keys columns are same as data tables
     * (3) Has mysql sequence information in addition to primary keys columns
     */
    assertEquals(shadowTable.name(), "shadow_Users_interleaved");
    assertThat(shadowTable.primaryKeys(), is(testDdl.table("Users_interleaved").primaryKeys()));
    Set<String> columns = shadowTable.columns().stream().map(c -> c.name()).collect(Collectors.toSet());
    Set<String> expectedColumns = testDdl.table("Users_interleaved").primaryKeys().stream().map(c -> c.name()).collect(Collectors.toSet());
    expectedColumns.add("timestamp");
    expectedColumns.add("log_file");
    expectedColumns.add("log_position");
    assertThat(columns, is(expectedColumns));
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) Set(java.util.Set) Test(org.junit.Test) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Collectors(java.util.stream.Collectors) Assert.assertEquals(org.junit.Assert.assertEquals) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Test(org.junit.Test)

Example 5 with Table

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Table in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearch method run.

/**
 * Runs the pipeline to completion with the specified options.
 *
 * @param options The execution options.
 * @return The pipeline result.
 */
private static PipelineResult run(GCSToElasticsearchOptions options) {
    // Create the pipeline
    Pipeline pipeline = Pipeline.create(options);
    // Register the coder for pipeline
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
    // Throw error if containsHeaders is true and a schema or Udf is also set.
    if (options.getContainsHeaders()) {
        checkArgument(options.getJavascriptTextTransformGcsPath() == null && options.getJsonSchemaPath() == null, "Cannot parse file containing headers with UDF or Json schema.");
    }
    // Throw error if only one retry configuration parameter is set.
    checkArgument((options.getMaxRetryAttempts() == null && options.getMaxRetryDuration() == null) || (options.getMaxRetryAttempts() != null && options.getMaxRetryDuration() != null), "To specify retry configuration both max attempts and max duration must be set.");
    /*
     * Steps: 1) Read records from CSV(s) via {@link CsvConverters.ReadCsv}.
     *        2) Convert lines to JSON strings via {@link CsvConverters.LineToFailsafeJson}.
     *        3a) Write JSON strings as documents to Elasticsearch via {@link ElasticsearchIO}.
     *        3b) Write elements that failed processing to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
     */
    PCollectionTuple convertedCsvLines = pipeline.apply("ReadCsv", CsvConverters.ReadCsv.newBuilder().setCsvFormat(options.getCsvFormat()).setDelimiter(options.getDelimiter()).setHasHeaders(options.getContainsHeaders()).setInputFileSpec(options.getInputFileSpec()).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setFileEncoding(options.getCsvFileEncoding()).build()).apply("ConvertLine", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setUdfOutputTag(PROCESSING_OUT).setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT).build());
    /*
     * Step 3a: Write elements that were successfully processed to Elasticsearch using {@link WriteToElasticsearch}.
     */
    convertedCsvLines.get(PROCESSING_OUT).apply("GetJsonDocuments", MapElements.into(TypeDescriptors.strings()).via(FailsafeElement::getPayload)).apply("WriteToElasticsearch", WriteToElasticsearch.newBuilder().setOptions(options.as(GCSToElasticsearchOptions.class)).build());
    /*
     * Step 3b: Write elements that failed processing to deadletter table via {@link BigQueryIO}.
     */
    convertedCsvLines.get(PROCESSING_DEADLETTER_OUT).apply("AddTimestamps", WithTimestamps.of((FailsafeElement<String, String> failures) -> new Instant())).apply("WriteFailedElementsToBigQuery", WriteStringMessageErrors.newBuilder().setErrorRecordsTable(options.getDeadletterTable()).setErrorRecordsTableSchema(SchemaUtils.DEADLETTER_SCHEMA).build());
    return pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions) Instant(org.joda.time.Instant) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Aggregations

Test (org.junit.Test)24 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 Ddl (com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl)8 Table (com.google.cloud.teleport.v2.templates.spanner.ddl.Table)8 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)8 Pipeline (org.apache.beam.sdk.Pipeline)8 ArrayList (java.util.ArrayList)6 Set (java.util.Set)6 TableRow (com.google.api.services.bigquery.model.TableRow)5 PipelineResult (org.apache.beam.sdk.PipelineResult)5 PCollection (org.apache.beam.sdk.values.PCollection)5 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)5 Column (com.google.cloud.teleport.v2.templates.spanner.ddl.Column)4 IndexColumn (com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn)4 IOException (java.io.IOException)4 Collectors (java.util.stream.Collectors)4 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)4 WriteResult (org.apache.beam.sdk.io.gcp.bigquery.WriteResult)4