use of com.google.cloud.spanner.Dialect in project beam by apache.
the class SpannerWriteIT method testPgFailFast.
@Test
public void testPgFailFast() throws Exception {
thrown.expect(new StackTraceContainsString("SpannerException"));
thrown.expect(new StackTraceContainsString("value must not be NULL in table users"));
int numRecords = 100;
PCollectionView<Dialect> dialectView = p.apply(Create.of(Dialect.POSTGRESQL)).apply(View.asSingleton());
p.apply(GenerateSequence.from(0).to(2 * numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
PipelineResult result = p.run();
result.waitUntilFinish();
}
use of com.google.cloud.spanner.Dialect in project beam by apache.
the class SpannerWriteIT method testSequentialWrite.
@Test
public void testSequentialWrite() throws Exception {
int numRecords = 100;
SpannerWriteResult stepOne = p.apply("first step", GenerateSequence.from(0).to(numRecords)).apply("Gen mutations1", ParDo.of(new GenerateMutations(options.getTable()))).apply("write to table1", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
p.apply("second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen mutations2", ParDo.of(new GenerateMutations(options.getTable()))).apply("wait", Wait.on(stepOne.getOutput())).apply("write to table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
PCollectionView<Dialect> dialectView = p.apply(Create.of(Dialect.POSTGRESQL)).apply(View.asSingleton());
SpannerWriteResult pgStepOne = p.apply("pg first step", GenerateSequence.from(0).to(numRecords)).apply("Gen pg mutations1", ParDo.of(new GenerateMutations(options.getTable()))).apply("write to pg table1", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
p.apply("pg second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen pg mutations2", ParDo.of(new GenerateMutations(options.getTable()))).apply("pg wait", Wait.on(pgStepOne.getOutput())).apply("write to pg table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(pgDatabaseName).withDialectView(dialectView));
PipelineResult result = p.run();
result.waitUntilFinish();
assertThat(result.getState(), is(PipelineResult.State.DONE));
assertThat(countNumberOfRecords(databaseName), equalTo(2L * numRecords));
assertThat(countNumberOfRecords(pgDatabaseName), equalTo(2L * numRecords));
}
use of com.google.cloud.spanner.Dialect in project DataflowTemplates by GoogleCloudPlatform.
the class ImportTransform method expand.
@Override
public PDone expand(PBegin begin) {
PCollectionView<Dialect> dialectView = begin.apply("Read Dialect", new ReadDialect(spannerConfig)).apply("Dialect As PCollectionView", View.asSingleton());
PCollection<Export> manifest = begin.apply("Read manifest", new ReadExportManifestFile(importDirectory, dialectView));
PCollectionView<Export> manifestView = manifest.apply("Manifest as view", View.asSingleton());
PCollection<KV<String, String>> allFiles = manifest.apply("Read all manifest files", new ReadManifestFiles(importDirectory));
PCollection<KV<String, List<String>>> tableFiles = allFiles.apply(Combine.perKey(AsList.fn()));
PCollection<KV<String, String>> schemas = tableFiles.apply("File per table, view or change stream", ParDo.of(new DoFn<KV<String, List<String>>, KV<String, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
KV<String, List<String>> kv = c.element();
if (!kv.getValue().isEmpty()) {
c.output(KV.of(kv.getKey(), kv.getValue().get(0)));
}
}
})).apply("Extract avro schemas", ParDo.of(new ReadAvroSchemas()));
final PCollection<List<KV<String, String>>> avroSchemas = schemas.apply("Build avro DDL", Combine.globally(AsList.fn()));
PCollectionView<Transaction> tx = begin.apply(SpannerIO.createTransaction().withSpannerConfig(spannerConfig));
PCollection<Ddl> informationSchemaDdl = begin.apply("Read Information Schema", new ReadInformationSchema(spannerConfig, tx, dialectView));
final PCollectionView<List<KV<String, String>>> avroDdlView = avroSchemas.apply("Avro ddl view", View.asSingleton());
final PCollectionView<Ddl> informationSchemaView = informationSchemaDdl.apply("Information schema view", View.asSingleton());
final PCollectionTuple createTableOutput = begin.apply("Create Cloud Spanner Tables and indexes", new CreateTables(spannerConfig, avroDdlView, informationSchemaView, manifestView, earlyIndexCreateFlag, ddlCreationTimeoutInMinutes));
final PCollection<Ddl> ddl = createTableOutput.get(CreateTables.getDdlObjectTag());
final PCollectionView<List<String>> pendingIndexes = createTableOutput.get(CreateTables.getPendingIndexesTag()).apply("As Index view", View.asSingleton());
final PCollectionView<List<String>> pendingForeignKeys = createTableOutput.get(CreateTables.getPendingForeignKeysTag()).apply("As Foreign keys view", View.asSingleton());
final PCollectionView<List<String>> pendingChangeStreams = createTableOutput.get(CreateTables.getPendingChangeStreamsTag()).apply("As change streams view", View.asSingleton());
PCollectionView<Ddl> ddlView = ddl.apply("Cloud Spanner DDL as view", View.asSingleton());
PCollectionView<HashMultimap<Integer, String>> levelsView = ddl.apply("Group tables by depth", ParDo.of(new DoFn<Ddl, HashMultimap<Integer, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
Ddl ddl = c.element();
c.output(ddl.perLevelView());
}
})).apply(View.asSingleton());
PCollection<HashMultimap<String, String>> acc = tableFiles.apply("Combine table files", Combine.globally(AsList.fn())).apply("As HashMultimap", ParDo.of(new DoFn<List<KV<String, List<String>>>, HashMultimap<String, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
HashMultimap<String, String> result = HashMultimap.create();
for (KV<String, List<String>> kv : c.element()) {
result.putAll(kv.getKey().toLowerCase(), kv.getValue());
}
c.output(result);
}
}));
PCollection<?> previousComputation = ddl;
for (int i = 0; i < MAX_DEPTH; i++) {
final int depth = i;
PCollection<KV<String, String>> levelFiles = acc.apply("Get Avro filenames depth " + depth, ParDo.of(new DoFn<HashMultimap<String, String>, KV<String, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
HashMultimap<String, String> allFiles = c.element();
HashMultimap<Integer, String> levels = c.sideInput(levelsView);
Set<String> tables = levels.get(depth);
for (String table : tables) {
for (String file : allFiles.get(table)) {
c.output(KV.of(file, table));
}
}
}
}).withSideInputs(levelsView)).apply("Wait for previous depth " + depth, Wait.on(previousComputation));
PCollection<Mutation> mutations = levelFiles.apply("Avro files as mutations " + depth, new AvroTableFileAsMutations(ddlView));
SpannerWriteResult result = mutations.apply("Write mutations " + depth, SpannerIO.write().withSchemaReadySignal(ddl).withSpannerConfig(spannerConfig).withCommitDeadline(Duration.standardMinutes(1)).withMaxCumulativeBackoff(Duration.standardHours(2)).withMaxNumMutations(10000).withGroupingFactor(100).withDialectView(dialectView));
previousComputation = result.getOutput();
}
ddl.apply(Wait.on(previousComputation)).apply("Create Indexes", new ApplyDDLTransform(spannerConfig, pendingIndexes, waitForIndexes)).apply("Add Foreign Keys", new ApplyDDLTransform(spannerConfig, pendingForeignKeys, waitForForeignKeys)).apply("Create Change Streams", new ApplyDDLTransform(spannerConfig, pendingChangeStreams, waitForChangeStreams));
return PDone.in(begin.getPipeline());
}
use of com.google.cloud.spanner.Dialect in project DataflowTemplates by GoogleCloudPlatform.
the class ExportTransformTest method buildDatabaseManifestFile.
@Test
public void buildDatabaseManifestFile() throws InvalidProtocolBufferException {
Map<String, String> tablesAndManifests = ImmutableMap.of("table1", "table1 manifest", "table2", "table2 manifest", "changeStream", "changeStream manifest");
PCollection<List<Export.Table>> metadataTables = pipeline.apply("Initialize table manifests", Create.of(tablesAndManifests)).apply("Combine table manifests", Combine.globally(new CombineTableMetadata()));
ImmutableList<Export.DatabaseOption> databaseOptions = ImmutableList.of(Export.DatabaseOption.newBuilder().setOptionName("version_retention_period").setOptionValue("5d").build());
Ddl.Builder ddlBuilder = Ddl.builder();
ddlBuilder.mergeDatabaseOptions(databaseOptions);
ddlBuilder.createChangeStream("changeStream").endChangeStream();
Ddl ddl = ddlBuilder.build();
PCollectionView<Ddl> ddlView = pipeline.apply(Create.of(ddl)).apply(View.asSingleton());
PCollectionView<Dialect> dialectView = pipeline.apply("CreateSingleton", Create.of(Dialect.GOOGLE_STANDARD_SQL)).apply("As PCollectionView", View.asSingleton());
PCollection<String> databaseManifest = metadataTables.apply("Test adding database option to manifest", ParDo.of(new CreateDatabaseManifest(ddlView, dialectView)).withSideInputs(ddlView, dialectView));
// The output JSON may contain the tables in any order, so a string comparison is not
// sufficient. Have to convert the manifest string to a protobuf. Also for the checker function
// to be serializable, it has to be written as a lambda.
PAssert.thatSingleton(databaseManifest).satisfies((SerializableFunction<String, Void>) input -> {
Builder builder1 = Export.newBuilder();
try {
JsonFormat.parser().merge(input, builder1);
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
Export manifestProto = builder1.build();
assertThat(manifestProto.getTablesCount(), is(2));
assertThat(manifestProto.getDialect(), is(ProtoDialect.GOOGLE_STANDARD_SQL));
String table1Name = manifestProto.getTables(0).getName();
assertThat(table1Name, startsWith("table"));
assertThat(manifestProto.getTables(0).getManifestFile(), is(table1Name + "-manifest.json"));
Export.DatabaseOption dbOptions = manifestProto.getDatabaseOptions(0);
String optionName = dbOptions.getOptionName();
String optionValue = dbOptions.getOptionValue();
assertThat(optionName, is("version_retention_period"));
assertThat(optionValue, is("5d"));
assertThat(manifestProto.getChangeStreamsCount(), is(1));
assertThat(manifestProto.getChangeStreams(0).getName(), is("changeStream"));
assertThat(manifestProto.getChangeStreams(0).getManifestFile(), is("changeStream-manifest.json"));
return null;
});
pipeline.run();
}
Aggregations