Search in sources :

Example 66 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportTimestampTest method runExportWithTsTest.

/* Validates behavior of database export without specifying timestamp
   * and with timestamp specified */
@Test
public void runExportWithTsTest() throws Exception {
    Ddl ddl = Ddl.builder().createTable("Users").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("age").int64().endColumn().primaryKey().asc("first_name").desc("last_name").end().endTable().createTable("AllTYPES").column("first_name").string().max().endColumn().column("last_name").string().size(5).endColumn().column("id").int64().notNull().endColumn().column("bool_field").bool().endColumn().column("int64_field").int64().endColumn().column("float64_field").float64().endColumn().column("string_field").string().max().endColumn().column("bytes_field").bytes().max().endColumn().column("timestamp_field").timestamp().endColumn().column("date_field").date().endColumn().column("arr_bool_field").type(Type.array(Type.bool())).endColumn().column("arr_int64_field").type(Type.array(Type.int64())).endColumn().column("arr_float64_field").type(Type.array(Type.float64())).endColumn().column("arr_string_field").type(Type.array(Type.string())).max().endColumn().column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn().column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn().column("arr_date_field").type(Type.array(Type.date())).endColumn().primaryKey().asc("first_name").desc("last_name").asc("id").end().interleaveInParent("Users").onDeleteCascade().endTable().build();
    // Create initial table and populate
    createAndPopulate(sourceDb, ddl, 100);
    // Export the database and note the timestamp ts1
    spannerServer.createDatabase(destDbPrefix + chkpt1, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkpt1, chkpt1, "", /* ts = "" */
    exportPipeline1, importPipeline1);
    String chkPt1Ts = getCurrentTimestamp();
    Thread.sleep(2000);
    // Sleep for a couple of seconds and note the timestamp ts2
    String chkPt2Ts = getCurrentTimestamp();
    Thread.sleep(2000);
    // Add more records to the table, export the database and note the timestamp ts3
    spannerServer.populateRandomData(sourceDb, ddl, 100);
    spannerServer.createDatabase(destDbPrefix + chkpt3, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkpt3, chkpt3, "", /* ts = "" */
    exportPipeline2, importPipeline2);
    String chkPt3Ts = getCurrentTimestamp();
    // Export timestamp with timestamp ts1
    spannerServer.createDatabase(destDbPrefix + chkPt1WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt1WithTs, chkPt1WithTs, chkPt1Ts, exportPipeline3, importPipeline3);
    // Export timestamp with timestamp ts2
    spannerServer.createDatabase(destDbPrefix + chkPt2WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt2WithTs, chkPt2WithTs, chkPt2Ts, exportPipeline4, importPipeline4);
    // Export timestamp with timestamp ts3
    spannerServer.createDatabase(destDbPrefix + chkPt3WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt3WithTs, chkPt3WithTs, chkPt3Ts, exportPipeline5, importPipeline5);
    // Compare databases exported at ts1 and exported later specifying timestamp ts1
    compareDbs(destDbPrefix + chkpt1, destDbPrefix + chkPt1WithTs, comparePipeline1);
    // Compare databases exported at ts1 and exported later specifying timestamp ts2
    compareDbs(destDbPrefix + chkpt1, destDbPrefix + chkPt2WithTs, comparePipeline2);
    // Compare databases exported at ts3 and exported later specifying timestamp ts3
    compareDbs(destDbPrefix + chkpt3, destDbPrefix + chkPt3WithTs, comparePipeline3);
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Example 67 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportTimestampTest method runPgExportWithTsTest.

@Test
public void runPgExportWithTsTest() throws Exception {
    Ddl ddl = Ddl.builder(Dialect.POSTGRESQL).createTable("Users").column("first_name").pgVarchar().max().endColumn().column("last_name").pgVarchar().size(5).endColumn().column("age").pgInt8().endColumn().primaryKey().asc("first_name").asc("last_name").end().endTable().createTable("AllTYPES").column("first_name").pgVarchar().max().endColumn().column("last_name").pgVarchar().size(5).endColumn().column("id").pgInt8().notNull().endColumn().column("bool_field").pgBool().endColumn().column("int_field").pgInt8().endColumn().column("float_field").pgFloat8().endColumn().column("string_field").pgText().endColumn().column("bytes_field").pgBytea().endColumn().column("timestamp_field").pgTimestamptz().endColumn().column("numeric_field").pgNumeric().endColumn().column("date_field").pgDate().endColumn().primaryKey().asc("first_name").asc("last_name").asc("id").end().interleaveInParent("Users").onDeleteCascade().endTable().build();
    // Create initial table and populate
    createAndPopulate(sourceDb, ddl, 100);
    // Export the database and note the timestamp ts1
    spannerServer.createPgDatabase(destDbPrefix + chkpt1, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkpt1, chkpt1, "", /* ts = "" */
    exportPipeline1, importPipeline1);
    String chkPt1Ts = getCurrentTimestamp();
    Thread.sleep(2000);
    // Sleep for a couple of seconds and note the timestamp ts2
    String chkPt2Ts = getCurrentTimestamp();
    Thread.sleep(2000);
    // Add more records to the table, export the database and note the timestamp ts3
    spannerServer.populateRandomData(sourceDb, ddl, 100);
    spannerServer.createPgDatabase(destDbPrefix + chkpt3, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkpt3, chkpt3, "", /* ts = "" */
    exportPipeline2, importPipeline2);
    String chkPt3Ts = getCurrentTimestamp();
    // Export timestamp with timestamp ts1
    spannerServer.createPgDatabase(destDbPrefix + chkPt1WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt1WithTs, chkPt1WithTs, chkPt1Ts, exportPipeline3, importPipeline3);
    // Export timestamp with timestamp ts2
    spannerServer.createPgDatabase(destDbPrefix + chkPt2WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt2WithTs, chkPt2WithTs, chkPt2Ts, exportPipeline4, importPipeline4);
    // Export timestamp with timestamp ts3
    spannerServer.createPgDatabase(destDbPrefix + chkPt3WithTs, Collections.emptyList());
    exportAndImportDbAtTime(sourceDb, destDbPrefix + chkPt3WithTs, chkPt3WithTs, chkPt3Ts, exportPipeline5, importPipeline5);
    // Compare databases exported at ts1 and exported later specifying timestamp ts1
    compareDbs(destDbPrefix + chkpt1, destDbPrefix + chkPt1WithTs, comparePipeline1, Dialect.POSTGRESQL);
    // Compare databases exported at ts1 and exported later specifying timestamp ts2
    compareDbs(destDbPrefix + chkpt1, destDbPrefix + chkPt2WithTs, comparePipeline2, Dialect.POSTGRESQL);
    // Compare databases exported at ts3 and exported later specifying timestamp ts3
    compareDbs(destDbPrefix + chkpt3, destDbPrefix + chkPt3WithTs, comparePipeline3, Dialect.POSTGRESQL);
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Test(org.junit.Test)

Example 68 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ExportTransformTest method buildDatabaseManifestFile.

@Test
public void buildDatabaseManifestFile() throws InvalidProtocolBufferException {
    Map<String, String> tablesAndManifests = ImmutableMap.of("table1", "table1 manifest", "table2", "table2 manifest", "changeStream", "changeStream manifest");
    PCollection<List<Export.Table>> metadataTables = pipeline.apply("Initialize table manifests", Create.of(tablesAndManifests)).apply("Combine table manifests", Combine.globally(new CombineTableMetadata()));
    ImmutableList<Export.DatabaseOption> databaseOptions = ImmutableList.of(Export.DatabaseOption.newBuilder().setOptionName("version_retention_period").setOptionValue("5d").build());
    Ddl.Builder ddlBuilder = Ddl.builder();
    ddlBuilder.mergeDatabaseOptions(databaseOptions);
    ddlBuilder.createChangeStream("changeStream").endChangeStream();
    Ddl ddl = ddlBuilder.build();
    PCollectionView<Ddl> ddlView = pipeline.apply(Create.of(ddl)).apply(View.asSingleton());
    PCollectionView<Dialect> dialectView = pipeline.apply("CreateSingleton", Create.of(Dialect.GOOGLE_STANDARD_SQL)).apply("As PCollectionView", View.asSingleton());
    PCollection<String> databaseManifest = metadataTables.apply("Test adding database option to manifest", ParDo.of(new CreateDatabaseManifest(ddlView, dialectView)).withSideInputs(ddlView, dialectView));
    // The output JSON may contain the tables in any order, so a string comparison is not
    // sufficient. Have to convert the manifest string to a protobuf. Also for the checker function
    // to be serializable, it has to be written as a lambda.
    PAssert.thatSingleton(databaseManifest).satisfies((SerializableFunction<String, Void>) input -> {
        Builder builder1 = Export.newBuilder();
        try {
            JsonFormat.parser().merge(input, builder1);
        } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException(e);
        }
        Export manifestProto = builder1.build();
        assertThat(manifestProto.getTablesCount(), is(2));
        assertThat(manifestProto.getDialect(), is(ProtoDialect.GOOGLE_STANDARD_SQL));
        String table1Name = manifestProto.getTables(0).getName();
        assertThat(table1Name, startsWith("table"));
        assertThat(manifestProto.getTables(0).getManifestFile(), is(table1Name + "-manifest.json"));
        Export.DatabaseOption dbOptions = manifestProto.getDatabaseOptions(0);
        String optionName = dbOptions.getOptionName();
        String optionValue = dbOptions.getOptionValue();
        assertThat(optionName, is("version_retention_period"));
        assertThat(optionValue, is("5d"));
        assertThat(manifestProto.getChangeStreamsCount(), is(1));
        assertThat(manifestProto.getChangeStreams(0).getName(), is("changeStream"));
        assertThat(manifestProto.getChangeStreams(0).getManifestFile(), is("changeStream-manifest.json"));
        return null;
    });
    pipeline.run();
}
Also used : Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) KV(org.apache.beam.sdk.values.KV) CombineTableMetadata(com.google.cloud.teleport.spanner.ExportTransform.CombineTableMetadata) Dialect(com.google.cloud.spanner.Dialect) Combine(org.apache.beam.sdk.transforms.Combine) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) View(org.apache.beam.sdk.transforms.View) Timestamp(com.google.cloud.Timestamp) BuildTableManifests(com.google.cloud.teleport.spanner.ExportTransform.BuildTableManifests) Assert.assertThat(org.junit.Assert.assertThat) Builder(com.google.cloud.teleport.spanner.ExportProtos.Export.Builder) ImmutableList(com.google.common.collect.ImmutableList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableManifest(com.google.cloud.teleport.spanner.ExportProtos.TableManifest) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Path(java.nio.file.Path) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ImmutableMap(com.google.common.collect.ImmutableMap) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) ProtoDialect(com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect) Test(org.junit.Test) TimestampBound(com.google.cloud.spanner.TimestampBound) PCollection(org.apache.beam.sdk.values.PCollection) Matchers.startsWith(org.hamcrest.Matchers.startsWith) Export(com.google.cloud.teleport.spanner.ExportProtos.Export) List(java.util.List) Rule(org.junit.Rule) CreateDatabaseManifest(com.google.cloud.teleport.spanner.ExportTransform.CreateDatabaseManifest) JsonFormat(com.google.protobuf.util.JsonFormat) ParDo(org.apache.beam.sdk.transforms.ParDo) PCollectionView(org.apache.beam.sdk.values.PCollectionView) Matchers.is(org.hamcrest.Matchers.is) Assert.assertEquals(org.junit.Assert.assertEquals) CombineTableMetadata(com.google.cloud.teleport.spanner.ExportTransform.CombineTableMetadata) Builder(com.google.cloud.teleport.spanner.ExportProtos.Export.Builder) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) CreateDatabaseManifest(com.google.cloud.teleport.spanner.ExportTransform.CreateDatabaseManifest) Dialect(com.google.cloud.spanner.Dialect) ProtoDialect(com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect) Export(com.google.cloud.teleport.spanner.ExportProtos.Export) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 69 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class ImportFromAvroTest method runTest.

private void runTest(Schema avroSchema, String spannerSchema, Iterable<GenericRecord> records, Dialect dialect) throws Exception {
    // Create the Avro file to be imported.
    String fileName = "avroFile.avro";
    ExportProtos.Export exportProto = ExportProtos.Export.newBuilder().addTables(ExportProtos.Export.Table.newBuilder().setName("AvroTable").addDataFiles(fileName).build()).addDatabaseOptions(ExportProtos.Export.DatabaseOption.newBuilder().setOptionName("version_retention_period").setOptionValue(dialect == Dialect.GOOGLE_STANDARD_SQL ? "\"4d\"" : "'4d'").build()).setDialect(ProtoDialect.valueOf(dialect.name())).build();
    JsonFormat.printer().print(exportProto);
    File manifestFile = tmpDir.newFile("spanner-export.json");
    String manifestFileLocation = manifestFile.getParent();
    Files.write(manifestFile.toPath(), JsonFormat.printer().print(exportProto).getBytes(StandardCharsets.UTF_8));
    File avroFile = tmpDir.newFile(fileName);
    try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
        fileWriter.create(avroSchema, avroFile);
        for (GenericRecord r : records) {
            fileWriter.append(r);
        }
        fileWriter.flush();
    }
    // Create the target database.
    switch(dialect) {
        case GOOGLE_STANDARD_SQL:
            spannerServer.createDatabase(dbName, Collections.singleton(spannerSchema));
            break;
        case POSTGRESQL:
            spannerServer.createPgDatabase(dbName, Collections.singleton(spannerSchema));
            break;
        default:
            throw new IllegalArgumentException("Unrecognized dialect: " + dialect);
    }
    // Run the import pipeline.
    importPipeline.apply("Import", new ImportTransform(spannerServer.getSpannerConfig(dbName), ValueProvider.StaticValueProvider.of(manifestFileLocation), ValueProvider.StaticValueProvider.of(true), ValueProvider.StaticValueProvider.of(true), ValueProvider.StaticValueProvider.of(true), ValueProvider.StaticValueProvider.of(true), ValueProvider.StaticValueProvider.of(30)));
    PipelineResult importResult = importPipeline.run();
    importResult.waitUntilFinish();
    Ddl ddl;
    try (ReadOnlyTransaction ctx = spannerServer.getDbClient(dbName).readOnlyTransaction()) {
        ddl = new InformationSchemaScanner(ctx, dialect).scan();
    }
    assertThat(ddl.databaseOptions().size(), is(1));
    ExportProtos.Export.DatabaseOption dbOption = ddl.databaseOptions().get(0);
    assertThat(dbOption.getOptionName(), is("version_retention_period"));
    assertThat(dbOption.getOptionValue(), is("4d"));
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) PipelineResult(org.apache.beam.sdk.PipelineResult) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) InformationSchemaScanner(com.google.cloud.teleport.spanner.ddl.InformationSchemaScanner) ReadOnlyTransaction(com.google.cloud.spanner.ReadOnlyTransaction) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 70 with Ddl

use of com.google.cloud.teleport.spanner.ddl.Ddl in project DataflowTemplates by GoogleCloudPlatform.

the class SpannerRecordConverterTest method pgNulls.

@Test
public void pgNulls() {
    Ddl ddl = Ddl.builder(Dialect.POSTGRESQL).createTable("users").column("id").pgInt8().notNull().endColumn().column("age").pgInt8().endColumn().column("name").pgVarchar().max().endColumn().column("bytes").pgBytea().endColumn().column("ts").pgTimestamptz().endColumn().column("date").pgDate().endColumn().primaryKey().asc("id").end().endTable().build();
    Schema schema = converter.convert(ddl).iterator().next();
    SpannerRecordConverter recordConverter = new SpannerRecordConverter(schema, Dialect.POSTGRESQL);
    Struct struct = Struct.newBuilder().set("id").to(1L).set("age").to((Long) null).set("name").to((String) null).set("bytes").to((ByteArray) null).set("ts").to((Timestamp) null).set("date").to((Date) null).build();
    GenericRecord avroRecord = recordConverter.convert(struct);
    assertThat(avroRecord.get("id"), equalTo(1L));
    assertThat(avroRecord.get("age"), is((Long) null));
    assertThat(avroRecord.get("name"), is((String) null));
    assertThat(avroRecord.get("bytes"), is((ByteArray) null));
    assertThat(avroRecord.get("ts"), is((String) null));
    assertThat(avroRecord.get("date"), is((String) null));
}
Also used : Schema(org.apache.avro.Schema) ByteArray(com.google.cloud.ByteArray) GenericRecord(org.apache.avro.generic.GenericRecord) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Timestamp(com.google.cloud.Timestamp) Date(com.google.cloud.Date) Struct(com.google.cloud.spanner.Struct) Test(org.junit.Test)

Aggregations

Ddl (com.google.cloud.teleport.spanner.ddl.Ddl)109 Test (org.junit.Test)91 Schema (org.apache.avro.Schema)34 GenericRecord (org.apache.avro.generic.GenericRecord)19 List (java.util.List)18 Struct (com.google.cloud.spanner.Struct)14 Collectors (java.util.stream.Collectors)14 KV (org.apache.beam.sdk.values.KV)14 SpannerTableFilter.getFilteredTables (com.google.cloud.teleport.spanner.SpannerTableFilter.getFilteredTables)12 Type (com.google.cloud.teleport.spanner.common.Type)12 Path (java.nio.file.Path)12 Collections (java.util.Collections)12 ImmutableList (com.google.common.collect.ImmutableList)11 IOException (java.io.IOException)11 Assert.assertEquals (org.junit.Assert.assertEquals)11 ReadImportManifest (com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest)10 ResolveDataFiles (com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles)10 BufferedWriter (java.io.BufferedWriter)10 Charset (java.nio.charset.Charset)10 RunWith (org.junit.runner.RunWith)9