Search in sources :

Example 6 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class BuildReadFromTableOperations method expand.

@Override
public PCollection<ReadOperation> expand(PCollection<Ddl> ddl) {
    return ddl.apply("Read from table operations", ParDo.of(new DoFn<Ddl, ReadOperation>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            Ddl ddl = c.element();
            List<String> tablesList = Collections.emptyList();
            // If the user provides a comma-separated list of strings, parse it into a List
            if (!tables.get().trim().isEmpty()) {
                tablesList = Arrays.asList(tables.get().split(",\\s*"));
            }
            for (Table table : getFilteredTables(ddl, tablesList)) {
                String columnsListAsString = table.columns().stream().filter(x -> !x.isGenerated()).map(x -> createColumnExpression(x)).collect(Collectors.joining(","));
                PartitionOptions partitionOptions = PartitionOptions.newBuilder().setMaxPartitions(MAX_PARTITIONS).build();
                // Also have to export table name to be able to identify which row belongs to
                // which table.
                ReadOperation read;
                switch(ddl.dialect()) {
                    case GOOGLE_STANDARD_SQL:
                        read = ReadOperation.create().withQuery(String.format("SELECT \"%s\" AS _spanner_table, %s FROM `%s` AS t", table.name(), columnsListAsString, table.name())).withPartitionOptions(partitionOptions);
                        break;
                    case POSTGRESQL:
                        read = ReadOperation.create().withQuery(String.format("SELECT '%s' AS _spanner_table, %s FROM \"%s\" AS t", table.name(), columnsListAsString, table.name())).withPartitionOptions(partitionOptions);
                        break;
                    default:
                        throw new IllegalArgumentException(String.format("Unrecognized dialect: %s", ddl.dialect()));
                }
                c.output(read);
            }
        }
    }));
}
Also used : SpannerTableFilter.getFilteredTables(com.google.cloud.teleport.spanner.SpannerTableFilter.getFilteredTables) DoFn(org.apache.beam.sdk.transforms.DoFn) Arrays(java.util.Arrays) Table(com.google.cloud.teleport.spanner.ddl.Table) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) PTransform(org.apache.beam.sdk.transforms.PTransform) ReadOperation(org.apache.beam.sdk.io.gcp.spanner.ReadOperation) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) Column(com.google.cloud.teleport.spanner.ddl.Column) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Collections(java.util.Collections) PartitionOptions(com.google.cloud.spanner.PartitionOptions) ValueProvider(org.apache.beam.sdk.options.ValueProvider) PartitionOptions(com.google.cloud.spanner.PartitionOptions) ReadOperation(org.apache.beam.sdk.io.gcp.spanner.ReadOperation) DoFn(org.apache.beam.sdk.transforms.DoFn) Table(com.google.cloud.teleport.spanner.ddl.Table) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl)

Example 7 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportRelatedTablesCheckTest method exportWithoutTableSelection.

/* Validates behavior of exporting full db without selecting any tables */
@Test
public void exportWithoutTableSelection() throws Exception {
    // spotless:off
    Ddl ddl = Ddl.builder().createTable("Users").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("age").int64().endColumn().primaryKey().asc("first_name").desc("last_name").end().endTable().createTable("People").column("id").int64().notNull().endColumn().column("name").string().max().endColumn().column("age").int64().endColumn().primaryKey().asc("id").end().endTable().createTable("AllTYPES").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("id").int64().notNull().endColumn().column("bool_field").bool().endColumn().column("int64_field").int64().endColumn().column("float64_field").float64().endColumn().column("string_field").string().max().endColumn().column("bytes_field").bytes().max().endColumn().column("timestamp_field").timestamp().endColumn().column("date_field").date().endColumn().column("arr_bool_field").type(Type.array(Type.bool())).endColumn().column("arr_int64_field").type(Type.array(Type.int64())).endColumn().column("arr_float64_field").type(Type.array(Type.float64())).endColumn().column("arr_string_field").type(Type.array(Type.string())).max().endColumn().column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn().column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn().column("arr_date_field").type(Type.array(Type.date())).endColumn().primaryKey().asc("first_name").desc("last_name").asc("id").end().endTable().build();
    // spotless:on
    createAndPopulate(ddl, 100);
    // Export and import all tables from the database
    spannerServer.createDatabase(destDbPrefix + fullExportChkpt, Collections.emptyList());
    exportAndImportDb(sourceDb, destDbPrefix + fullExportChkpt, fullExportChkpt, "", /* relatedTables =*/
    false, exportPipeline, importPipeline);
    // Compare the tables in the ddl to ensure all original tables were re-created during the import
    compareExpectedTables(destDbPrefix + fullExportChkpt, ImmutableList.of(allTypesTable, peopleTable, usersTable));
    // Check to see selected tables exported with data and and unselected tables did not
    List<String> exportTables = ImmutableList.of(allTypesTable, peopleTable, usersTable);
    List<String> unselectedTables = Collections.emptyList();
    compareExpectedTableRows(destDbPrefix + fullExportChkpt, exportTables, unselectedTables);
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Example 8 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportRelatedTablesCheckTest method exportSelectedAndNecessaryTablesInComplexDdl.

/* Validates that pipeline executes table level export for this complex ddl when --tableNames
   * is provided, --shouldExportRelatedTables is set to true, and additional tables need
   * to be exported. */
@Test
public void exportSelectedAndNecessaryTablesInComplexDdl() throws Exception {
    // spotless:off
    Ddl ddl = Ddl.builder().createTable("table_a").column("id1").int64().endColumn().column("id2").int64().endColumn().primaryKey().asc("id1").asc("id2").end().endTable().createTable("table_b").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_c").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().interleaveInParent("table_b").foreignKeys(ImmutableList.of("ALTER TABLE `table_c` ADD CONSTRAINT `fk_table_b` FOREIGN KEY (`id1`)" + " REFERENCES `table_b` (`id1`)")).endTable().createTable("table_d").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_e").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_f").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().interleaveInParent("table_e").foreignKeys(ImmutableList.of("ALTER TABLE `table_f` ADD CONSTRAINT `fk_table_f` FOREIGN KEY (`id2`)" + " REFERENCES `table_e` (`id2`)")).endTable().createTable("table_g").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_h").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_i").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().interleaveInParent("table_h").endTable().createTable("table_j").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().interleaveInParent("table_i").endTable().createTable("table_k").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_l").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().build();
    // spotless:on
    createAndPopulate(ddl, /* numBatches = */
    100);
    // Export the single table along with it's required tables
    spannerServer.createDatabase(destDbPrefix + chkptNine, Collections.emptyList());
    exportAndImportDb(sourceDb, destDbPrefix + chkptNine, chkptNine, String.join(",", ImmutableList.of(tableA, tableC, tableF, tableJ)), /* relatedTables =*/
    true, exportPipeline, importPipeline);
    // Compare the tables in the ddl to ensure all original tables were re-created during the import
    compareExpectedTables(destDbPrefix + chkptNine, ImmutableList.of(tableA, tableB, tableC, tableD, tableE, tableF, tableG, tableH, tableI, tableJ, tableK, tableL));
    // Check to see selected tables exported with data and and unselected tables did not
    List<String> exportTables = ImmutableList.of(tableA, tableB, tableC, tableE, tableF, tableH, tableI, tableJ);
    List<String> unselectedTables = ImmutableList.of(tableD, tableK, tableL);
    compareExpectedTableRows(destDbPrefix + chkptNine, exportTables, unselectedTables);
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Example 9 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportRelatedTablesCheckTest method exportFullDbWithFlagTrue.

/* Validates that pipeline execution fails when --tableNames is not filled and
   * --shouldExportRelatedTables is set to true */
@Test
public void exportFullDbWithFlagTrue() throws Exception {
    // spotless:off
    Ddl ddl = Ddl.builder().createTable("table_a").column("id1").int64().endColumn().column("id2").int64().endColumn().primaryKey().asc("id1").asc("id2").end().endTable().createTable("table_b").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().endTable().createTable("table_c").column("id1").int64().endColumn().column("id2").int64().endColumn().column("id3").int64().endColumn().primaryKey().asc("id1").asc("id2").asc("id3").end().interleaveInParent("table_b").endTable().build();
    // spotless:on
    // Add to referencedTable field (i.e. `table_c` would have a foreign key constraint
    // referencing `table_a` )
    ddl.addNewReferencedTable("table_c", "table_a");
    createAndPopulate(ddl, /* numBatches = */
    100);
    // Expected PipelineExecutionException caused by Exception:
    // set --shouldExportRelatedTables to true when no tables were specified for export
    spannerServer.createDatabase(destDbPrefix + chkptFour, Collections.emptyList());
    assertThrows(PipelineExecutionException.class, () -> exportAndImportDb(sourceDb, destDbPrefix + chkptFour, chkptFour, // --tableNames would not be set, defaults to an empty string
    "", /* relatedTables =*/
    true, exportPipeline, importPipeline));
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Example 10 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportRelatedTablesCheckTest method exportMultipleTables.

/* Validates behavior of exporting multiple unrelated tables */
@Test
public void exportMultipleTables() throws Exception {
    // spotless:off
    Ddl ddl = Ddl.builder().createTable("Users").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("age").int64().endColumn().primaryKey().asc("first_name").desc("last_name").end().endTable().createTable("People").column("id").int64().notNull().endColumn().column("name").string().max().endColumn().column("age").int64().endColumn().primaryKey().asc("id").end().endTable().createTable("AllTYPES").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("id").int64().notNull().endColumn().column("bool_field").bool().endColumn().column("int64_field").int64().endColumn().column("float64_field").float64().endColumn().column("string_field").string().max().endColumn().column("bytes_field").bytes().max().endColumn().column("timestamp_field").timestamp().endColumn().column("date_field").date().endColumn().column("arr_bool_field").type(Type.array(Type.bool())).endColumn().column("arr_int64_field").type(Type.array(Type.int64())).endColumn().column("arr_float64_field").type(Type.array(Type.float64())).endColumn().column("arr_string_field").type(Type.array(Type.string())).max().endColumn().column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn().column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn().column("arr_date_field").type(Type.array(Type.date())).endColumn().primaryKey().asc("first_name").desc("last_name").asc("id").end().endTable().build();
    // spotless:on
    createAndPopulate(ddl, 100);
    // Export and import two specific tables from the database containing three tables
    spannerServer.createDatabase(destDbPrefix + multiTableChkpt, Collections.emptyList());
    exportAndImportDb(sourceDb, destDbPrefix + multiTableChkpt, multiTableChkpt, usersTable + "," + allTypesTable, /* relatedTables =*/
    false, exportPipeline, importPipeline);
    // Compare the tables in the ddl to ensure all original tables were re-created during the import
    compareExpectedTables(destDbPrefix + multiTableChkpt, ImmutableList.of(allTypesTable, peopleTable, usersTable));
    // Check to see selected tables exported with data and and unselected tables did not
    List<String> exportTables = ImmutableList.of(allTypesTable, usersTable);
    List<String> unselectedTables = ImmutableList.of(peopleTable);
    compareExpectedTableRows(destDbPrefix + multiTableChkpt, exportTables, unselectedTables);
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Aggregations

Ddl (com.google.cloud.teleport.spanner.ddl.Ddl)109 Test (org.junit.Test)91 Schema (org.apache.avro.Schema)34 GenericRecord (org.apache.avro.generic.GenericRecord)19 List (java.util.List)18 Struct (com.google.cloud.spanner.Struct)14 Collectors (java.util.stream.Collectors)14 KV (org.apache.beam.sdk.values.KV)14 SpannerTableFilter.getFilteredTables (com.google.cloud.teleport.spanner.SpannerTableFilter.getFilteredTables)12 Type (com.google.cloud.teleport.spanner.common.Type)12 Path (java.nio.file.Path)12 Collections (java.util.Collections)12 ImmutableList (com.google.common.collect.ImmutableList)11 IOException (java.io.IOException)11 Assert.assertEquals (org.junit.Assert.assertEquals)11 ReadImportManifest (com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest)10 ResolveDataFiles (com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles)10 BufferedWriter (java.io.BufferedWriter)10 Charset (java.nio.charset.Charset)10 RunWith (org.junit.runner.RunWith)9