use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.
the class ProcessInformationSchemaIntegrationTest method canCreateMissingShadowTables.
@Test
public void canCreateMissingShadowTables() throws Exception {
SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
Ddl testDdl = getTestDdlBuilder().createTable("shadow_Table").column("ID").int64().endColumn().column("version").int64().endColumn().primaryKey().asc("ID").end().endTable().build();
createDb(testDdl);
testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
true, "shadow", "oracle"));
PipelineResult testResult = testPipeline.run();
testResult.waitUntilFinish();
Ddl finalDdl = readDdl(testDb);
assertEquals(4, finalDdl.allTables().size());
Table shadowTable = finalDdl.table("shadow_Table");
Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
assertNotNull(shadowTable);
assertNotNull(shadowTableInterleaved);
assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.
the class ChangeEventConvertor method changeEventToPrimaryKey.
static com.google.cloud.spanner.Key changeEventToPrimaryKey(Ddl ddl, JsonNode changeEvent) throws ChangeEventConvertorException {
String tableName = changeEvent.get(DatastreamConstants.EVENT_TABLE_NAME_KEY).asText();
try {
Table table = ddl.table(tableName);
ImmutableList<IndexColumn> keyColumns = table.primaryKeys();
com.google.cloud.spanner.Key.Builder pk = com.google.cloud.spanner.Key.newBuilder();
for (IndexColumn keyColumn : keyColumns) {
Column key = table.column(keyColumn.name());
Type keyColType = key.type();
String keyColName = key.name().toLowerCase();
switch(keyColType.getCode()) {
case BOOL:
pk.append(ChangeEventTypeConvertor.toBoolean(changeEvent, keyColName, /*requiredField=*/
true));
break;
case INT64:
pk.append(ChangeEventTypeConvertor.toLong(changeEvent, keyColName, /*requiredField=*/
true));
break;
case FLOAT64:
pk.append(ChangeEventTypeConvertor.toDouble(changeEvent, keyColName, /*requiredField=*/
true));
break;
case STRING:
pk.append(ChangeEventTypeConvertor.toString(changeEvent, keyColName, /*requiredField=*/
true));
break;
case NUMERIC:
pk.append(ChangeEventTypeConvertor.toNumericBigDecimal(changeEvent, keyColName, /*requiredField=*/
true));
break;
case BYTES:
pk.append(ChangeEventTypeConvertor.toByteArray(changeEvent, keyColName, /*requiredField=*/
true));
break;
case TIMESTAMP:
pk.append(ChangeEventTypeConvertor.toTimestamp(changeEvent, keyColName, /*requiredField=*/
true));
break;
case DATE:
pk.append(ChangeEventTypeConvertor.toDate(changeEvent, keyColName, /*requiredField=*/
true));
break;
// TODO(b/179070999) - Add support for other data types.
default:
throw new IllegalArgumentException("Column name(" + keyColName + ") has unsupported column type(" + keyColType + ")");
}
}
return pk.build();
} catch (Exception e) {
throw new ChangeEventConvertorException(e);
}
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.
the class ShadowTableCreator method constructShadowTable.
/*
* Constructs a shadow table for a data table in the information schema.
* Note: Shadow tables for interleaved tables are not interleaved to
* their shadow parent table.
*/
Table constructShadowTable(Ddl informationSchema, String dataTableName) {
// Create a new shadow table with the given prefix.
Table.Builder shadowTableBuilder = Table.builder();
String shadowTableName = shadowTablePrefix + dataTableName;
shadowTableBuilder.name(shadowTableName);
// Add key columns from the data table to the shadow table builder.
Table dataTable = informationSchema.table(dataTableName);
Set<String> primaryKeyColNames = dataTable.primaryKeys().stream().map(k -> k.name()).collect(Collectors.toSet());
List<Column> primaryKeyCols = dataTable.columns().stream().filter(col -> primaryKeyColNames.contains(col.name())).collect(Collectors.toList());
for (Column col : primaryKeyCols) {
shadowTableBuilder.addColumn(col);
}
// Add primary key constraints.
for (IndexColumn keyColumn : dataTable.primaryKeys()) {
if (keyColumn.order() == IndexColumn.Order.ASC) {
shadowTableBuilder.primaryKey().asc(keyColumn.name()).end();
} else if (keyColumn.order() == IndexColumn.Order.DESC) {
shadowTableBuilder.primaryKey().desc(keyColumn.name()).end();
}
}
// Add extra column to track ChangeEventSequence information
addChangeEventSequenceColumns(shadowTableBuilder);
return shadowTableBuilder.build();
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryMetadataLoader method loadTableMetadata.
/**
* Populates {@code table} builder with additional metadata like partition names and schema.
*
* @param filter optional filter to skip a subset of tables
* @return {@code true} if the table matches all filters and should be included in the results,
* {@code false} if it should be skipped
*/
private boolean loadTableMetadata(BigQueryTable.Builder table, Filter filter) throws InterruptedException {
TableReadOptions.Builder readOptions = TableReadOptions.newBuilder();
if (table.getPartitioningColumn() == null) {
if (filter != null && filter.shouldSkipUnpartitionedTable(table)) {
return false;
}
} else {
List<BigQueryTablePartition> partitions = loadTablePartitions(table, filter);
if (filter != null && filter.shouldSkipPartitionedTable(table, partitions)) {
return false;
}
table.setPartitions(partitions);
LOG.info("Loaded {} partitions for table {}: {}", partitions.size(), table.getTableName(), partitions);
// Creating a ReadSession without a WHERE clause for a partitioned table that has
// "require partition filter" param set to true would fail with the error:
// "Cannot query over table ... without a filter over column(s) ...
// that can be used for partition elimination".
// The following is a hack that adds an "is null and is not null" filter over the
// partitioning column, which shouldn't select any data but should make the query
// analyzer happy and should be enough to extract the table schema.
// TODO(an2x): do this only when "require partition filter" = true
// or load schema differently?
readOptions.setRowRestriction(String.format("%s is null and %s is not null", table.getPartitioningColumn(), table.getPartitioningColumn()));
}
ReadSession session = BigQueryUtils.createReadSession(bqsClient, DatasetId.of(table.getProject(), table.getDataset()), table.getTableName(), readOptions.build());
table.setSchema(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
LOG.info("Loaded schema for table {}: {}", table.getTableName(), table.getSchema());
return true;
}
use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.
the class DeleteBigQueryDataFnTest method testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions.
/**
* Test that DeleteBigQueryDataFn doesn't attempt to delete special BigQuery partitions even if
* {@code deleteSourceData = true}.
*
* <p>As per <a
* href="https://cloud.google.com/bigquery/docs/managing-partitioned-tables#delete_a_partition">
* this documentation</a>, special partitions "__NULL__" and "__UNPARTITIONED__" cannot be
* deleted.
*/
@Test
@Category(NeedsRunner.class)
public void testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions() {
Options options = TestPipeline.testingPipelineOptions().as(Options.class);
options.setDeleteSourceData(true);
BigQueryTablePartition.Builder builder = BigQueryTablePartition.builder().setLastModificationTime(System.currentTimeMillis() * 1000);
BigQueryTablePartition p1 = builder.setPartitionName("__NULL__").build();
BigQueryTablePartition p2 = builder.setPartitionName("__UNPARTITIONED__").build();
BigQueryTablePartition p3 = builder.setPartitionName("NORMAL_PARTITION").build();
BigQueryTable t1 = table.toBuilder().setPartitions(Arrays.asList(p1, p2, p3)).setPartitioningColumn("column-name-doesnt-matter").build();
DeleteBigQueryDataFn fn = new DeleteBigQueryDataFn().withTestBqClientFactory(() -> bqMock);
testPipeline.apply("CreateInput", Create.of(KV.of(t1, p1), KV.of(t1, p2), KV.of(t1, p3)).withCoder(fnCoder)).apply("TestDeleteBigQueryDataFn", ParDo.of(fn));
testPipeline.run(options);
verify(bqMock, times(1)).delete(TableId.of("pr1", "d1", "t1$NORMAL_PARTITION"));
verifyNoMoreInteractions(bqMock);
}
Aggregations