Search in sources :

Example 11 with Dialect

use of com.google.cloud.spanner.Dialect in project beam by apache.

the class SpannerWriteIT method testPgFailFast.

@Test
public void testPgFailFast() throws Exception {
    thrown.expect(new StackTraceContainsString("SpannerException"));
    thrown.expect(new StackTraceContainsString("value must not be NULL in table users"));
    int numRecords = 100;
    PCollectionView<Dialect> dialectView = p.apply(Create.of(Dialect.POSTGRESQL)).apply(View.asSingleton());
    p.apply(GenerateSequence.from(0).to(2 * numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
    PipelineResult result = p.run();
    result.waitUntilFinish();
}
Also used : Dialect(com.google.cloud.spanner.Dialect) PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 12 with Dialect

use of com.google.cloud.spanner.Dialect in project beam by apache.

the class SpannerWriteIT method testSequentialWrite.

@Test
public void testSequentialWrite() throws Exception {
    int numRecords = 100;
    SpannerWriteResult stepOne = p.apply("first step", GenerateSequence.from(0).to(numRecords)).apply("Gen mutations1", ParDo.of(new GenerateMutations(options.getTable()))).apply("write to table1", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    p.apply("second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen mutations2", ParDo.of(new GenerateMutations(options.getTable()))).apply("wait", Wait.on(stepOne.getOutput())).apply("write to table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    PCollectionView<Dialect> dialectView = p.apply(Create.of(Dialect.POSTGRESQL)).apply(View.asSingleton());
    SpannerWriteResult pgStepOne = p.apply("pg first step", GenerateSequence.from(0).to(numRecords)).apply("Gen pg mutations1", ParDo.of(new GenerateMutations(options.getTable()))).apply("write to pg table1", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
    p.apply("pg second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen pg mutations2", ParDo.of(new GenerateMutations(options.getTable()))).apply("pg wait", Wait.on(pgStepOne.getOutput())).apply("write to pg table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
    PipelineResult result = p.run();
    result.waitUntilFinish();
    assertThat(result.getState(), is(PipelineResult.State.DONE));
    assertThat(countNumberOfRecords(databaseName), equalTo(2L * numRecords));
    assertThat(countNumberOfRecords(pgDatabaseName), equalTo(2L * numRecords));
}
Also used : Dialect(com.google.cloud.spanner.Dialect) PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 13 with Dialect

use of com.google.cloud.spanner.Dialect in project DataflowTemplates by GoogleCloudPlatform.

the class ImportTransform method expand.

@Override
public PDone expand(PBegin begin) {
    PCollectionView<Dialect> dialectView = begin.apply("Read Dialect", new ReadDialect(spannerConfig)).apply("Dialect As PCollectionView", View.asSingleton());
    PCollection<Export> manifest = begin.apply("Read manifest", new ReadExportManifestFile(importDirectory, dialectView));
    PCollectionView<Export> manifestView = manifest.apply("Manifest as view", View.asSingleton());
    PCollection<KV<String, String>> allFiles = manifest.apply("Read all manifest files", new ReadManifestFiles(importDirectory));
    PCollection<KV<String, List<String>>> tableFiles = allFiles.apply(Combine.perKey(AsList.fn()));
    PCollection<KV<String, String>> schemas = tableFiles.apply("File per table, view or change stream", ParDo.of(new DoFn<KV<String, List<String>>, KV<String, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            KV<String, List<String>> kv = c.element();
            if (!kv.getValue().isEmpty()) {
                c.output(KV.of(kv.getKey(), kv.getValue().get(0)));
            }
        }
    })).apply("Extract avro schemas", ParDo.of(new ReadAvroSchemas()));
    final PCollection<List<KV<String, String>>> avroSchemas = schemas.apply("Build avro DDL", Combine.globally(AsList.fn()));
    PCollectionView<Transaction> tx = begin.apply(SpannerIO.createTransaction().withSpannerConfig(spannerConfig));
    PCollection<Ddl> informationSchemaDdl = begin.apply("Read Information Schema", new ReadInformationSchema(spannerConfig, tx, dialectView));
    final PCollectionView<List<KV<String, String>>> avroDdlView = avroSchemas.apply("Avro ddl view", View.asSingleton());
    final PCollectionView<Ddl> informationSchemaView = informationSchemaDdl.apply("Information schema view", View.asSingleton());
    final PCollectionTuple createTableOutput = begin.apply("Create Cloud Spanner Tables and indexes", new CreateTables(spannerConfig, avroDdlView, informationSchemaView, manifestView, earlyIndexCreateFlag, ddlCreationTimeoutInMinutes));
    final PCollection<Ddl> ddl = createTableOutput.get(CreateTables.getDdlObjectTag());
    final PCollectionView<List<String>> pendingIndexes = createTableOutput.get(CreateTables.getPendingIndexesTag()).apply("As Index view", View.asSingleton());
    final PCollectionView<List<String>> pendingForeignKeys = createTableOutput.get(CreateTables.getPendingForeignKeysTag()).apply("As Foreign keys view", View.asSingleton());
    final PCollectionView<List<String>> pendingChangeStreams = createTableOutput.get(CreateTables.getPendingChangeStreamsTag()).apply("As change streams view", View.asSingleton());
    PCollectionView<Ddl> ddlView = ddl.apply("Cloud Spanner DDL as view", View.asSingleton());
    PCollectionView<HashMultimap<Integer, String>> levelsView = ddl.apply("Group tables by depth", ParDo.of(new DoFn<Ddl, HashMultimap<Integer, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            Ddl ddl = c.element();
            c.output(ddl.perLevelView());
        }
    })).apply(View.asSingleton());
    PCollection<HashMultimap<String, String>> acc = tableFiles.apply("Combine table files", Combine.globally(AsList.fn())).apply("As HashMultimap", ParDo.of(new DoFn<List<KV<String, List<String>>>, HashMultimap<String, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            HashMultimap<String, String> result = HashMultimap.create();
            for (KV<String, List<String>> kv : c.element()) {
                result.putAll(kv.getKey().toLowerCase(), kv.getValue());
            }
            c.output(result);
        }
    }));
    PCollection<?> previousComputation = ddl;
    for (int i = 0; i < MAX_DEPTH; i++) {
        final int depth = i;
        PCollection<KV<String, String>> levelFiles = acc.apply("Get Avro filenames depth " + depth, ParDo.of(new DoFn<HashMultimap<String, String>, KV<String, String>>() {

            @ProcessElement
            public void processElement(ProcessContext c) {
                HashMultimap<String, String> allFiles = c.element();
                HashMultimap<Integer, String> levels = c.sideInput(levelsView);
                Set<String> tables = levels.get(depth);
                for (String table : tables) {
                    for (String file : allFiles.get(table)) {
                        c.output(KV.of(file, table));
                    }
                }
            }
        }).withSideInputs(levelsView)).apply("Wait for previous depth " + depth, Wait.on(previousComputation));
        PCollection<Mutation> mutations = levelFiles.apply("Avro files as mutations " + depth, new AvroTableFileAsMutations(ddlView));
        SpannerWriteResult result = mutations.apply("Write mutations " + depth, SpannerIO.write().withSchemaReadySignal(ddl).withSpannerConfig(spannerConfig).withCommitDeadline(Duration.standardMinutes(1)).withMaxCumulativeBackoff(Duration.standardHours(2)).withMaxNumMutations(10000).withGroupingFactor(100).withDialectView(dialectView));
        previousComputation = result.getOutput();
    }
    ddl.apply(Wait.on(previousComputation)).apply("Create Indexes", new ApplyDDLTransform(spannerConfig, pendingIndexes, waitForIndexes)).apply("Add Foreign Keys", new ApplyDDLTransform(spannerConfig, pendingForeignKeys, waitForForeignKeys)).apply("Create Change Streams", new ApplyDDLTransform(spannerConfig, pendingChangeStreams, waitForChangeStreams));
    return PDone.in(begin.getPipeline());
}
Also used : Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Dialect(com.google.cloud.spanner.Dialect) ProtoDialect(com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect) Export(com.google.cloud.teleport.spanner.ExportProtos.Export) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PCollectionList(org.apache.beam.sdk.values.PCollectionList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) HashMultimap(com.google.common.collect.HashMultimap) DoFn(org.apache.beam.sdk.transforms.DoFn) Transaction(org.apache.beam.sdk.io.gcp.spanner.Transaction) SpannerWriteResult(org.apache.beam.sdk.io.gcp.spanner.SpannerWriteResult) Mutation(com.google.cloud.spanner.Mutation)

Example 14 with Dialect

use of com.google.cloud.spanner.Dialect in project DataflowTemplates by GoogleCloudPlatform.

the class ExportTransformTest method buildDatabaseManifestFile.

@Test
public void buildDatabaseManifestFile() throws InvalidProtocolBufferException {
    Map<String, String> tablesAndManifests = ImmutableMap.of("table1", "table1 manifest", "table2", "table2 manifest", "changeStream", "changeStream manifest");
    PCollection<List<Export.Table>> metadataTables = pipeline.apply("Initialize table manifests", Create.of(tablesAndManifests)).apply("Combine table manifests", Combine.globally(new CombineTableMetadata()));
    ImmutableList<Export.DatabaseOption> databaseOptions = ImmutableList.of(Export.DatabaseOption.newBuilder().setOptionName("version_retention_period").setOptionValue("5d").build());
    Ddl.Builder ddlBuilder = Ddl.builder();
    ddlBuilder.mergeDatabaseOptions(databaseOptions);
    ddlBuilder.createChangeStream("changeStream").endChangeStream();
    Ddl ddl = ddlBuilder.build();
    PCollectionView<Ddl> ddlView = pipeline.apply(Create.of(ddl)).apply(View.asSingleton());
    PCollectionView<Dialect> dialectView = pipeline.apply("CreateSingleton", Create.of(Dialect.GOOGLE_STANDARD_SQL)).apply("As PCollectionView", View.asSingleton());
    PCollection<String> databaseManifest = metadataTables.apply("Test adding database option to manifest", ParDo.of(new CreateDatabaseManifest(ddlView, dialectView)).withSideInputs(ddlView, dialectView));
    // The output JSON may contain the tables in any order, so a string comparison is not
    // sufficient. Have to convert the manifest string to a protobuf. Also for the checker function
    // to be serializable, it has to be written as a lambda.
    PAssert.thatSingleton(databaseManifest).satisfies((SerializableFunction<String, Void>) input -> {
        Builder builder1 = Export.newBuilder();
        try {
            JsonFormat.parser().merge(input, builder1);
        } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException(e);
        }
        Export manifestProto = builder1.build();
        assertThat(manifestProto.getTablesCount(), is(2));
        assertThat(manifestProto.getDialect(), is(ProtoDialect.GOOGLE_STANDARD_SQL));
        String table1Name = manifestProto.getTables(0).getName();
        assertThat(table1Name, startsWith("table"));
        assertThat(manifestProto.getTables(0).getManifestFile(), is(table1Name + "-manifest.json"));
        Export.DatabaseOption dbOptions = manifestProto.getDatabaseOptions(0);
        String optionName = dbOptions.getOptionName();
        String optionValue = dbOptions.getOptionValue();
        assertThat(optionName, is("version_retention_period"));
        assertThat(optionValue, is("5d"));
        assertThat(manifestProto.getChangeStreamsCount(), is(1));
        assertThat(manifestProto.getChangeStreams(0).getName(), is("changeStream"));
        assertThat(manifestProto.getChangeStreams(0).getManifestFile(), is("changeStream-manifest.json"));
        return null;
    });
    pipeline.run();
}
Also used : Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) KV(org.apache.beam.sdk.values.KV) CombineTableMetadata(com.google.cloud.teleport.spanner.ExportTransform.CombineTableMetadata) Dialect(com.google.cloud.spanner.Dialect) Combine(org.apache.beam.sdk.transforms.Combine) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) View(org.apache.beam.sdk.transforms.View) Timestamp(com.google.cloud.Timestamp) BuildTableManifests(com.google.cloud.teleport.spanner.ExportTransform.BuildTableManifests) Assert.assertThat(org.junit.Assert.assertThat) Builder(com.google.cloud.teleport.spanner.ExportProtos.Export.Builder) ImmutableList(com.google.common.collect.ImmutableList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableManifest(com.google.cloud.teleport.spanner.ExportProtos.TableManifest) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) Path(java.nio.file.Path) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ImmutableMap(com.google.common.collect.ImmutableMap) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) ProtoDialect(com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect) Test(org.junit.Test) TimestampBound(com.google.cloud.spanner.TimestampBound) PCollection(org.apache.beam.sdk.values.PCollection) Matchers.startsWith(org.hamcrest.Matchers.startsWith) Export(com.google.cloud.teleport.spanner.ExportProtos.Export) List(java.util.List) Rule(org.junit.Rule) CreateDatabaseManifest(com.google.cloud.teleport.spanner.ExportTransform.CreateDatabaseManifest) JsonFormat(com.google.protobuf.util.JsonFormat) ParDo(org.apache.beam.sdk.transforms.ParDo) PCollectionView(org.apache.beam.sdk.values.PCollectionView) Matchers.is(org.hamcrest.Matchers.is) Assert.assertEquals(org.junit.Assert.assertEquals) CombineTableMetadata(com.google.cloud.teleport.spanner.ExportTransform.CombineTableMetadata) Builder(com.google.cloud.teleport.spanner.ExportProtos.Export.Builder) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) CreateDatabaseManifest(com.google.cloud.teleport.spanner.ExportTransform.CreateDatabaseManifest) Dialect(com.google.cloud.spanner.Dialect) ProtoDialect(com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect) Export(com.google.cloud.teleport.spanner.ExportProtos.Export) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Aggregations

Dialect (com.google.cloud.spanner.Dialect)14 Test (org.junit.Test)10 List (java.util.List)6 ProtoDialect (com.google.cloud.teleport.spanner.ExportProtos.ProtoDialect)4 Ddl (com.google.cloud.teleport.spanner.ddl.Ddl)4 PipelineResult (org.apache.beam.sdk.PipelineResult)4 KV (org.apache.beam.sdk.values.KV)4 Mutation (com.google.cloud.spanner.Mutation)3 ReadOnlyTransaction (com.google.cloud.spanner.ReadOnlyTransaction)3 Export (com.google.cloud.teleport.spanner.ExportProtos.Export)3 ArrayList (java.util.ArrayList)3 TimestampBound (com.google.cloud.spanner.TimestampBound)2 TableManifest (com.google.cloud.teleport.spanner.ExportProtos.TableManifest)2 HashMultimap (com.google.common.collect.HashMultimap)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)2 Collections (java.util.Collections)2 Map (java.util.Map)2 Transaction (org.apache.beam.sdk.io.gcp.spanner.Transaction)2 DoFn (org.apache.beam.sdk.transforms.DoFn)2 Timestamp (com.google.cloud.Timestamp)1