Search in sources :

Example 1 with TrackedSpannerColumn

use of com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn in project DataflowTemplates by GoogleCloudPlatform.

the class SpannerUtils method getSpannerColumnsByTableName.

/**
 * Query INFORMATION_SCHEMA.COLUMNS to construct {@link SpannerColumn} for each Spanner column
 * tracked by change stream.
 */
private Map<String, List<TrackedSpannerColumn>> getSpannerColumnsByTableName(Set<String> spannerTableNames, Map<String, Set<String>> keyColumnNameByTableName, Map<String, Set<String>> spannerColumnNamesExplicitlyTrackedByChangeStreamByTableName) {
    Map<String, List<TrackedSpannerColumn>> result = new HashMap<>();
    StringBuilder sqlStringBuilder = new StringBuilder("SELECT TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION, SPANNER_TYPE " + "FROM INFORMATION_SCHEMA.COLUMNS");
    // Skip the columns of the tables that are not tracked by change stream.
    if (!spannerTableNames.isEmpty()) {
        sqlStringBuilder.append(" WHERE TABLE_NAME IN UNNEST (@tableNames)");
    }
    Statement.Builder statementBuilder = Statement.newBuilder(sqlStringBuilder.toString());
    if (!spannerTableNames.isEmpty()) {
        statementBuilder.bind("tableNames").to(Value.stringArray(new ArrayList<>(spannerTableNames)));
    }
    try (ResultSet columnsResultSet = databaseClient.singleUse().executeQuery(statementBuilder.build())) {
        while (columnsResultSet.next()) {
            String tableName = columnsResultSet.getString(INFORMATION_SCHEMA_TABLE_NAME);
            String columnName = columnsResultSet.getString(INFORMATION_SCHEMA_COLUMN_NAME);
            // tracked. Primary key columns are always tracked.
            if (spannerColumnNamesExplicitlyTrackedByChangeStreamByTableName.containsKey(tableName) && !spannerColumnNamesExplicitlyTrackedByChangeStreamByTableName.get(tableName).contains(columnName) && (!keyColumnNameByTableName.containsKey(tableName) || !keyColumnNameByTableName.get(tableName).contains(columnName))) {
                continue;
            }
            int ordinalPosition = (int) columnsResultSet.getLong(INFORMATION_SCHEMA_ORDINAL_POSITION);
            String spannerType = columnsResultSet.getString(INFORMATION_SCHEMA_SPANNER_TYPE);
            result.putIfAbsent(tableName, new ArrayList<>());
            TrackedSpannerColumn spannerColumn = TrackedSpannerColumn.create(columnName, informationSchemaTypeToSpannerType(spannerType), ordinalPosition);
            result.get(tableName).add(spannerColumn);
        }
    }
    return result;
}
Also used : TrackedSpannerColumn(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn) HashMap(java.util.HashMap) Statement(com.google.cloud.spanner.Statement) ArrayList(java.util.ArrayList) ResultSet(com.google.cloud.spanner.ResultSet) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with TrackedSpannerColumn

use of com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtilsTest method testChangeStreamTrackAll.

@Test
public void testChangeStreamTrackAll() {
    mockInformationSchemaChangeStreamsQuery(true);
    mockInformationSchemaTablesQuery();
    mockInformationSchemaColumnsQuery();
    mockInformationSchemaKeyColumnUsageQuery();
    String sql = "SELECT TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.CHANGE_STREAM_COLUMNS " + "WHERE CHANGE_STREAM_NAME = @changeStreamName";
    when(mockReadContext.executeQuery(Statement.newBuilder(sql).bind("changeStreamName").to(changeStreamName).build())).thenReturn(ResultSets.forRows(Type.struct(Type.StructField.of("TABLE_NAME", Type.string()), Type.StructField.of("COLUMN_NAME", Type.string())), Collections.emptyList()));
    Map<String, TrackedSpannerTable> actualSpannerTableByName = new SpannerUtils(mockDatabaseClient, changeStreamName).getSpannerTableByName();
    List<TrackedSpannerColumn> singersPkColumns = ImmutableList.of(TrackedSpannerColumn.create("SingerId", Type.int64(), 1));
    List<TrackedSpannerColumn> singersNonPkColumns = ImmutableList.of(TrackedSpannerColumn.create("FirstName", Type.string(), 2), TrackedSpannerColumn.create("LastName", Type.string(), 3));
    Map<String, TrackedSpannerTable> expectedSpannerTableByName = new HashMap<>();
    expectedSpannerTableByName.put("Singers", new TrackedSpannerTable("Singers", singersPkColumns, singersNonPkColumns));
    assertThat(actualSpannerTableByName).isEqualTo(expectedSpannerTableByName);
}
Also used : TrackedSpannerTable(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerTable) TrackedSpannerColumn(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn) SpannerUtils(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.schemautils.SpannerUtils) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 3 with TrackedSpannerColumn

use of com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtilsTest method testChangeStreamTrackOneColumn.

@Test
public void testChangeStreamTrackOneColumn() {
    mockInformationSchemaChangeStreamsQuery(false);
    mockInformationSchemaChangeStreamTablesQuery();
    mockInformationSchemaColumnsQuery();
    mockInformationSchemaKeyColumnUsageQuery();
    String sql = "SELECT TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.CHANGE_STREAM_COLUMNS " + "WHERE CHANGE_STREAM_NAME = @changeStreamName";
    // spotless:off
    when(mockReadContext.executeQuery(Statement.newBuilder(sql).bind("changeStreamName").to(changeStreamName).build())).thenReturn(ResultSets.forRows(Type.struct(Type.StructField.of("TABLE_NAME", Type.string()), Type.StructField.of("COLUMN_NAME", Type.string())), ImmutableList.of(Struct.newBuilder().set("TABLE_NAME").to(Value.string("Singers")).set("COLUMN_NAME").to(Value.string("SingerId")).build(), Struct.newBuilder().set("TABLE_NAME").to(Value.string("Singers")).set("COLUMN_NAME").to(Value.string("FirstName")).build())));
    // spotless:on
    Map<String, TrackedSpannerTable> actualSpannerTableByName = new SpannerUtils(mockDatabaseClient, changeStreamName).getSpannerTableByName();
    List<TrackedSpannerColumn> singersPkColumns = Collections.singletonList(TrackedSpannerColumn.create("SingerId", Type.int64(), 1));
    List<TrackedSpannerColumn> singersNonPkColumns = Collections.singletonList(TrackedSpannerColumn.create("FirstName", Type.string(), 2));
    Map<String, TrackedSpannerTable> expectedSpannerTableByName = new HashMap<>();
    expectedSpannerTableByName.put("Singers", new TrackedSpannerTable("Singers", singersPkColumns, singersNonPkColumns));
    assertThat(actualSpannerTableByName).isEqualTo(expectedSpannerTableByName);
}
Also used : TrackedSpannerTable(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerTable) TrackedSpannerColumn(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn) SpannerUtils(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.schemautils.SpannerUtils) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 4 with TrackedSpannerColumn

use of com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtilsTest method testChangeStreamTrackOneTable.

@Test
public void testChangeStreamTrackOneTable() {
    mockInformationSchemaChangeStreamsQuery(false);
    mockInformationSchemaChangeStreamTablesQuery();
    mockInformationSchemaColumnsQuery();
    mockInformationSchemaKeyColumnUsageQuery();
    String sql = "SELECT TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.CHANGE_STREAM_COLUMNS " + "WHERE CHANGE_STREAM_NAME = @changeStreamName";
    when(mockReadContext.executeQuery(Statement.newBuilder(sql).bind("changeStreamName").to(changeStreamName).build())).thenReturn(ResultSets.forRows(Type.struct(Type.StructField.of("TABLE_NAME", Type.string()), Type.StructField.of("COLUMN_NAME", Type.string())), Collections.emptyList()));
    Map<String, TrackedSpannerTable> actualSpannerTableByName = new SpannerUtils(mockDatabaseClient, changeStreamName).getSpannerTableByName();
    List<TrackedSpannerColumn> singersPkColumns = ImmutableList.of(TrackedSpannerColumn.create("SingerId", Type.int64(), 1));
    List<TrackedSpannerColumn> singersNonPkColumns = ImmutableList.of(TrackedSpannerColumn.create("FirstName", Type.string(), 2), TrackedSpannerColumn.create("LastName", Type.string(), 3));
    Map<String, TrackedSpannerTable> expectedSpannerTableByName = new HashMap<>();
    expectedSpannerTableByName.put("Singers", new TrackedSpannerTable("Singers", singersPkColumns, singersNonPkColumns));
    assertThat(actualSpannerTableByName).isEqualTo(expectedSpannerTableByName);
}
Also used : TrackedSpannerTable(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerTable) TrackedSpannerColumn(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn) SpannerUtils(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.schemautils.SpannerUtils) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 5 with TrackedSpannerColumn

use of com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtilsTest method testAppendToSpannerKey.

@Test
public void testAppendToSpannerKey() {
    JSONObject keysJsonObject = new JSONObject();
    keysJsonObject.put(BOOLEAN_COL, BOOLEAN_VAL.getBool());
    keysJsonObject.put(BYTES_COL, BYTES_VAL.getBytes().toBase64());
    keysJsonObject.put(DATE_COL, DATE_VAL.getDate().toString());
    keysJsonObject.put(FLOAT64_COL, FLOAT64_VAL.getFloat64());
    keysJsonObject.put(INT64_COL, INT64_VAL.getInt64());
    keysJsonObject.put(NUMERIC_COL, NUMERIC_VAL.getNumeric());
    keysJsonObject.put(STRING_COL, STRING_VAL.getString());
    keysJsonObject.put(TIMESTAMP_COL, TIMESTAMP_VAL.toString());
    Key.Builder keyBuilder = com.google.cloud.spanner.Key.newBuilder();
    for (TrackedSpannerColumn spannerColumn : spannerColumnsOfAllTypes) {
        String typeName = spannerColumn.getType().getCode().name();
        // Array and JSON are not valid Spanner key type.
        if (typeName.equals("ARRAY") || typeName.equals("JSON")) {
            continue;
        }
        SpannerUtils.appendToSpannerKey(spannerColumn, keysJsonObject, keyBuilder);
    }
    assertThat(keyBuilder.build().toString()).isEqualTo("[true,NDU2,2022-03-11,2.5,10,10,abc,2022-03-07T01:50:53.972000000Z]");
}
Also used : TrackedSpannerColumn(com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn) JSONObject(org.json.JSONObject) Key(com.google.cloud.spanner.Key) Test(org.junit.Test)

Aggregations

TrackedSpannerColumn (com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerColumn)9 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 Test (org.junit.Test)5 ResultSet (com.google.cloud.spanner.ResultSet)4 TrackedSpannerTable (com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.model.TrackedSpannerTable)4 SpannerUtils (com.google.cloud.teleport.v2.templates.spannerchangestreamstobigquery.schemautils.SpannerUtils)3 List (java.util.List)3 JSONObject (org.json.JSONObject)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 Type (com.google.cloud.spanner.Type)2 JSONArray (org.json.JSONArray)2 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 Field (com.google.cloud.bigquery.Field)1 StandardSQLTypeName (com.google.cloud.bigquery.StandardSQLTypeName)1 Key (com.google.cloud.spanner.Key)1 Statement (com.google.cloud.spanner.Statement)1 HashSet (java.util.HashSet)1 Objects (java.util.Objects)1 Set (java.util.Set)1