Search in sources :

Example 6 with BigQueryTable

use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.

the class DeleteBigQueryDataFnTest method testTransform_withDeleteSourceDataDisabled_doesntTruncateData.

@Test
@Category(NeedsRunner.class)
public void testTransform_withDeleteSourceDataDisabled_doesntTruncateData() {
    Options options = TestPipeline.testingPipelineOptions().as(Options.class);
    options.setDeleteSourceData(false);
    BigQueryTable partitionedTable = table.toBuilder().setPartitions(Collections.singletonList(partition)).setPartitioningColumn("column-name-doesnt-matter").build();
    DeleteBigQueryDataFn fn = new DeleteBigQueryDataFn().withTestBqClientFactory(() -> bqMock);
    PCollection<Void> actual = testPipeline.apply("CreateInput", Create.of(KV.of(partitionedTable, partition), KV.of(table, (BigQueryTablePartition) null)).withCoder(fnCoder)).apply("TestDeleteBigQueryDataFn", ParDo.of(fn));
    PAssert.that(actual).empty();
    testPipeline.run(options);
    verifyNoMoreInteractions(bqMock);
}
Also used : Options(com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 7 with BigQueryTable

use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryMetadataLoader method loadDatasetMetadata.

/**
 * Loads metadata for all tables in the dataset {@code datasetId} returning only those accepted by
 * the {@code filter}.
 *
 * @param filter if {@code null}, will include all tables and partitions
 */
public List<BigQueryTable> loadDatasetMetadata(DatasetId datasetId, @Nullable Filter filter) throws InterruptedException, ExecutionException {
    String tableSql = String.format("select\n" + "    table_id,\n" + "    timestamp_millis(last_modified_time) as last_modified_time,\n" + "    (select column_name from `%s.%s.INFORMATION_SCHEMA.COLUMNS` c\n" + "      where c.table_catalog = t.project_id\n" + "        and c.table_schema = t.dataset_id\n" + "        and c.table_name = t.table_id\n" + "        and c.is_partitioning_column = 'YES') as partitioning_column,\n" + "  from `%s.%s.__TABLES__` t\n" + // Tables only (1), not views (2), or external tables (3).
    " where type = 1", datasetId.getProject(), datasetId.getDataset(), datasetId.getProject(), datasetId.getDataset());
    TableResult tableRows = bqClient.query(QueryJobConfiguration.newBuilder(tableSql).build());
    List<Callable<BigQueryTable>> tableQueries = new ArrayList<>();
    tableRows.iterateAll().forEach(row -> tableQueries.add(() -> {
        BigQueryTable.Builder table = BigQueryTable.builder().setProject(datasetId.getProject()).setDataset(datasetId.getDataset()).setTableName(row.get(0).getStringValue()).setLastModificationTime(row.get(1).getTimestampValue()).setPartitioningColumn(!row.get(2).isNull() ? row.get(2).getStringValue() : null);
        try {
            if (!loadTableMetadata(table, filter)) {
                return null;
            }
        } catch (RuntimeException e) {
            throw new RuntimeException("Error loading table " + table.getTableName() + " metadata.", e);
        }
        return table.build();
    }));
    ExecutorService executor = Executors.newFixedThreadPool(maxParallelRequests);
    List<Future<BigQueryTable>> tableFutures = executor.invokeAll(tableQueries);
    executor.shutdown();
    List<BigQueryTable> tables = new ArrayList<>(tableFutures.size());
    for (Future<BigQueryTable> ft : tableFutures) {
        BigQueryTable t = ft.get();
        if (t != null) {
            tables.add(t);
        }
    }
    return tables;
}
Also used : ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) TableResult(com.google.cloud.bigquery.TableResult) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)7 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)4 Options (com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Category (org.junit.experimental.categories.Category)2 Table (com.google.api.services.bigquery.model.Table)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableRow (com.google.api.services.bigquery.model.TableRow)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 TimePartitioning (com.google.api.services.bigquery.model.TimePartitioning)1 TableId (com.google.cloud.bigquery.TableId)1 TableResult (com.google.cloud.bigquery.TableResult)1 DataplexBigQueryToGcsOptions (com.google.cloud.teleport.v2.options.DataplexBigQueryToGcsOptions)1 BigQueryTableToGcsTransform (com.google.cloud.teleport.v2.transforms.BigQueryTableToGcsTransform)1 DeleteBigQueryDataFn (com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn)1 NoopTransform (com.google.cloud.teleport.v2.transforms.NoopTransform)1 UpdateDataplexBigQueryToGcsExportMetadataTransform (com.google.cloud.teleport.v2.transforms.UpdateDataplexBigQueryToGcsExportMetadataTransform)1 BigQueryMetadataLoader (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader)1