Search in sources :

Example 1 with Column

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.

the class ProcessInformationSchemaIntegrationTest method canCreateMissingShadowTables.

@Test
public void canCreateMissingShadowTables() throws Exception {
    SpannerConfig sourceConfig = spannerServer.getSpannerConfig(testDb);
    Ddl testDdl = getTestDdlBuilder().createTable("shadow_Table").column("ID").int64().endColumn().column("version").int64().endColumn().primaryKey().asc("ID").end().endTable().build();
    createDb(testDdl);
    testPipeline.apply("Process Information Schema", new ProcessInformationSchema(sourceConfig, /*shouldCreateShadowTables=*/
    true, "shadow", "oracle"));
    PipelineResult testResult = testPipeline.run();
    testResult.waitUntilFinish();
    Ddl finalDdl = readDdl(testDb);
    assertEquals(4, finalDdl.allTables().size());
    Table shadowTable = finalDdl.table("shadow_Table");
    Table shadowTableInterleaved = finalDdl.table("shadow_Table_interleaved");
    assertNotNull(shadowTable);
    assertNotNull(shadowTableInterleaved);
    assertThat(shadowTableInterleaved.primaryKeys(), is(testDdl.table("Table_interleaved").primaryKeys()));
    assertEquals(shadowTableInterleaved.columns().size(), testDdl.table("Table_interleaved").primaryKeys().size() + 2);
}
Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) PipelineResult(org.apache.beam.sdk.PipelineResult) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 2 with Column

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.

the class ChangeEventConvertor method changeEventToPrimaryKey.

static com.google.cloud.spanner.Key changeEventToPrimaryKey(Ddl ddl, JsonNode changeEvent) throws ChangeEventConvertorException {
    String tableName = changeEvent.get(DatastreamConstants.EVENT_TABLE_NAME_KEY).asText();
    try {
        Table table = ddl.table(tableName);
        ImmutableList<IndexColumn> keyColumns = table.primaryKeys();
        com.google.cloud.spanner.Key.Builder pk = com.google.cloud.spanner.Key.newBuilder();
        for (IndexColumn keyColumn : keyColumns) {
            Column key = table.column(keyColumn.name());
            Type keyColType = key.type();
            String keyColName = key.name().toLowerCase();
            switch(keyColType.getCode()) {
                case BOOL:
                    pk.append(ChangeEventTypeConvertor.toBoolean(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case INT64:
                    pk.append(ChangeEventTypeConvertor.toLong(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case FLOAT64:
                    pk.append(ChangeEventTypeConvertor.toDouble(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case STRING:
                    pk.append(ChangeEventTypeConvertor.toString(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case NUMERIC:
                    pk.append(ChangeEventTypeConvertor.toNumericBigDecimal(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case BYTES:
                    pk.append(ChangeEventTypeConvertor.toByteArray(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case TIMESTAMP:
                    pk.append(ChangeEventTypeConvertor.toTimestamp(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                case DATE:
                    pk.append(ChangeEventTypeConvertor.toDate(changeEvent, keyColName, /*requiredField=*/
                    true));
                    break;
                // TODO(b/179070999) -  Add support for other data types.
                default:
                    throw new IllegalArgumentException("Column name(" + keyColName + ") has unsupported column type(" + keyColType + ")");
            }
        }
        return pk.build();
    } catch (Exception e) {
        throw new ChangeEventConvertorException(e);
    }
}
Also used : Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) IndexColumn(com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn) Type(com.google.cloud.spanner.Type) IndexColumn(com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn) Column(com.google.cloud.teleport.v2.templates.spanner.ddl.Column)

Example 3 with Column

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.

the class ShadowTableCreator method constructShadowTable.

/*
   * Constructs a shadow table for a data table in the information schema.
   * Note: Shadow tables for interleaved tables are not interleaved to
   * their shadow parent table.
   */
Table constructShadowTable(Ddl informationSchema, String dataTableName) {
    // Create a new shadow table with the given prefix.
    Table.Builder shadowTableBuilder = Table.builder();
    String shadowTableName = shadowTablePrefix + dataTableName;
    shadowTableBuilder.name(shadowTableName);
    // Add key columns from the data table to the shadow table builder.
    Table dataTable = informationSchema.table(dataTableName);
    Set<String> primaryKeyColNames = dataTable.primaryKeys().stream().map(k -> k.name()).collect(Collectors.toSet());
    List<Column> primaryKeyCols = dataTable.columns().stream().filter(col -> primaryKeyColNames.contains(col.name())).collect(Collectors.toList());
    for (Column col : primaryKeyCols) {
        shadowTableBuilder.addColumn(col);
    }
    // Add primary key constraints.
    for (IndexColumn keyColumn : dataTable.primaryKeys()) {
        if (keyColumn.order() == IndexColumn.Order.ASC) {
            shadowTableBuilder.primaryKey().asc(keyColumn.name()).end();
        } else if (keyColumn.order() == IndexColumn.Order.DESC) {
            shadowTableBuilder.primaryKey().desc(keyColumn.name()).end();
        }
    }
    // Add extra column to track ChangeEventSequence information
    addChangeEventSequenceColumns(shadowTableBuilder);
    return shadowTableBuilder.build();
}
Also used : List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) DatastreamConstants(com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) IndexColumn(com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn) Column(com.google.cloud.teleport.v2.templates.spanner.ddl.Column) Map(java.util.Map) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) Set(java.util.Set) Collectors(java.util.stream.Collectors) Table(com.google.cloud.teleport.v2.templates.spanner.ddl.Table) IndexColumn(com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn) Column(com.google.cloud.teleport.v2.templates.spanner.ddl.Column) IndexColumn(com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn)

Example 4 with Column

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryMetadataLoader method loadTableMetadata.

/**
 * Populates {@code table} builder with additional metadata like partition names and schema.
 *
 * @param filter optional filter to skip a subset of tables
 * @return {@code true} if the table matches all filters and should be included in the results,
 *     {@code false} if it should be skipped
 */
private boolean loadTableMetadata(BigQueryTable.Builder table, Filter filter) throws InterruptedException {
    TableReadOptions.Builder readOptions = TableReadOptions.newBuilder();
    if (table.getPartitioningColumn() == null) {
        if (filter != null && filter.shouldSkipUnpartitionedTable(table)) {
            return false;
        }
    } else {
        List<BigQueryTablePartition> partitions = loadTablePartitions(table, filter);
        if (filter != null && filter.shouldSkipPartitionedTable(table, partitions)) {
            return false;
        }
        table.setPartitions(partitions);
        LOG.info("Loaded {} partitions for table {}: {}", partitions.size(), table.getTableName(), partitions);
        // Creating a ReadSession without a WHERE clause for a partitioned table that has
        // "require partition filter" param set to true would fail with the error:
        // "Cannot query over table ... without a filter over column(s) ...
        // that can be used for partition elimination".
        // The following is a hack that adds an "is null and is not null" filter over the
        // partitioning column, which shouldn't select any data but should make the query
        // analyzer happy and should be enough to extract the table schema.
        // TODO(an2x): do this only when "require partition filter" = true
        // or load schema differently?
        readOptions.setRowRestriction(String.format("%s is null and %s is not null", table.getPartitioningColumn(), table.getPartitioningColumn()));
    }
    ReadSession session = BigQueryUtils.createReadSession(bqsClient, DatasetId.of(table.getProject(), table.getDataset()), table.getTableName(), readOptions.build());
    table.setSchema(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
    LOG.info("Loaded schema for table {}: {}", table.getTableName(), table.getSchema());
    return true;
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) ReadSession(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession) TableReadOptions(com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions)

Example 5 with Column

use of com.google.cloud.teleport.v2.templates.spanner.ddl.Column in project DataflowTemplates by GoogleCloudPlatform.

the class DeleteBigQueryDataFnTest method testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions.

/**
 * Test that DeleteBigQueryDataFn doesn't attempt to delete special BigQuery partitions even if
 * {@code deleteSourceData = true}.
 *
 * <p>As per <a
 * href="https://cloud.google.com/bigquery/docs/managing-partitioned-tables#delete_a_partition">
 * this documentation</a>, special partitions "__NULL__" and "__UNPARTITIONED__" cannot be
 * deleted.
 */
@Test
@Category(NeedsRunner.class)
public void testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions() {
    Options options = TestPipeline.testingPipelineOptions().as(Options.class);
    options.setDeleteSourceData(true);
    BigQueryTablePartition.Builder builder = BigQueryTablePartition.builder().setLastModificationTime(System.currentTimeMillis() * 1000);
    BigQueryTablePartition p1 = builder.setPartitionName("__NULL__").build();
    BigQueryTablePartition p2 = builder.setPartitionName("__UNPARTITIONED__").build();
    BigQueryTablePartition p3 = builder.setPartitionName("NORMAL_PARTITION").build();
    BigQueryTable t1 = table.toBuilder().setPartitions(Arrays.asList(p1, p2, p3)).setPartitioningColumn("column-name-doesnt-matter").build();
    DeleteBigQueryDataFn fn = new DeleteBigQueryDataFn().withTestBqClientFactory(() -> bqMock);
    testPipeline.apply("CreateInput", Create.of(KV.of(t1, p1), KV.of(t1, p2), KV.of(t1, p3)).withCoder(fnCoder)).apply("TestDeleteBigQueryDataFn", ParDo.of(fn));
    testPipeline.run(options);
    verify(bqMock, times(1)).delete(TableId.of("pr1", "d1", "t1$NORMAL_PARTITION"));
    verifyNoMoreInteractions(bqMock);
}
Also used : Options(com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options) BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

Ddl (com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl)4 Table (com.google.cloud.teleport.v2.templates.spanner.ddl.Table)4 Test (org.junit.Test)4 Column (com.google.cloud.teleport.v2.templates.spanner.ddl.Column)2 IndexColumn (com.google.cloud.teleport.v2.templates.spanner.ddl.IndexColumn)2 Options (com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options)2 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)2 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 Category (org.junit.experimental.categories.Category)2 TableReadOptions (com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions)1 ReadSession (com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession)1 Type (com.google.cloud.spanner.Type)1 IntegrationTest (com.google.cloud.teleport.v2.spanner.IntegrationTest)1 DatastreamConstants (com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants)1 List (java.util.List)1 Map (java.util.Map)1 PipelineResult (org.apache.beam.sdk.PipelineResult)1 SpannerConfig (org.apache.beam.sdk.io.gcp.spanner.SpannerConfig)1