Search in sources :

Example 11 with BigQueryTablePartition

use of com.google.cloud.teleport.v2.values.BigQueryTablePartition in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionFail_filterAcceptsTables.

@Test(expected = WriteDispositionException.class)
public void test_whenTargetFileExistsWithWriteDispositionFail_filterAcceptsTables() {
    BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
    BigQueryTablePartition p = partition().setPartitionName("partition1").build();
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime(null);
    options.setFileFormat(FileFormatOptions.AVRO);
    options.setWriteDisposition(WriteDispositionOptions.FAIL);
    Filter f = new com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
    f.shouldSkipUnpartitionedTable(t);
    f.shouldSkipPartition(t, p);
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Example 12 with BigQueryTablePartition

use of com.google.cloud.teleport.v2.values.BigQueryTablePartition in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables.

@Test
public void test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables() {
    BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
    BigQueryTablePartition p = partition().setPartitionName("partition1").build();
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime(null);
    options.setFileFormat(FileFormatOptions.AVRO);
    options.setWriteDisposition(WriteDispositionOptions.OVERWRITE);
    Filter f = new DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
    assertThat(f.shouldSkipUnpartitionedTable(t)).isFalse();
    assertThat(f.shouldSkipPartition(t, p)).isFalse();
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Example 13 with BigQueryTablePartition

use of com.google.cloud.teleport.v2.values.BigQueryTablePartition in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenBeforeDateSet_filterExcludesTablesAndPartitions.

@Test
public void test_whenBeforeDateSet_filterExcludesTablesAndPartitions() {
    BigQueryTable.Builder olderTable = table().setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC - 1000L);
    BigQueryTable.Builder newerTable = table().setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC + 1000L);
    BigQueryTablePartition olderPartition = partition().setPartitionName("p1").setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC - 1000L).build();
    BigQueryTablePartition newerPartition = partition().setPartitionName("p2").setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC + 1000L).build();
    List<BigQueryTablePartition> partitions = Arrays.asList(olderPartition, newerPartition);
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime("2021-01-01T15:00:00Z");
    Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
    assertThat(f.shouldSkipUnpartitionedTable(newerTable)).isTrue();
    assertThat(f.shouldSkipUnpartitionedTable(olderTable)).isFalse();
    // If a table is partitioned, we should filter individual partitions by modification time,
    // so the table itself should NOT be skipped no matter what the table modification time is.
    // Expecting shouldSkip = false for both newer and older tables:
    assertThat(f.shouldSkipPartitionedTable(newerTable, partitions)).isFalse();
    assertThat(f.shouldSkipPartitionedTable(olderTable, partitions)).isFalse();
    assertThat(f.shouldSkipPartition(olderTable, newerPartition)).isTrue();
    assertThat(f.shouldSkipPartition(olderTable, olderPartition)).isFalse();
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Example 14 with BigQueryTablePartition

use of com.google.cloud.teleport.v2.values.BigQueryTablePartition in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionSKIP_filterExcludesTables.

@Test
public void test_whenTargetFileExistsWithWriteDispositionSKIP_filterExcludesTables() {
    BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
    BigQueryTablePartition p = partition().setPartitionName("partition1").build();
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime(null);
    options.setFileFormat(FileFormatOptions.AVRO);
    options.setWriteDisposition(WriteDispositionOptions.SKIP);
    Filter f = new DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
    assertThat(f.shouldSkipUnpartitionedTable(t)).isTrue();
    assertThat(f.shouldSkipPartition(t, p)).isTrue();
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Example 15 with BigQueryTablePartition

use of com.google.cloud.teleport.v2.values.BigQueryTablePartition in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryMetadataLoader method loadTablePartitions.

private List<BigQueryTablePartition> loadTablePartitions(BigQueryTable.Builder table, Filter filter) throws InterruptedException {
    String partitionSql = String.format("select partition_id, last_modified_time\n" + "from `%s.%s.INFORMATION_SCHEMA.PARTITIONS`\n" + "where table_name = @table_name", table.getProject(), table.getDataset());
    TableResult partitionRows = bqClient.query(QueryJobConfiguration.newBuilder(partitionSql).addNamedParameter("table_name", QueryParameterValue.string(table.getTableName())).build());
    List<BigQueryTablePartition> partitions = new ArrayList<>();
    partitionRows.iterateAll().forEach(// TODO(an2x): Check we didn't get duplicate partition names.
    r -> {
        BigQueryTablePartition p = BigQueryTablePartition.builder().setPartitionName(r.get(0).getStringValue()).setLastModificationTime(r.get(1).getTimestampValue()).build();
        if (filter == null || !filter.shouldSkipPartition(table, p)) {
            partitions.add(p);
        }
    });
    return partitions;
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) TableResult(com.google.cloud.bigquery.TableResult) ArrayList(java.util.ArrayList)

Aggregations

BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)13 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)11 Test (org.junit.Test)9 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)6 Options (com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options)3 ArrayList (java.util.ArrayList)3 Category (org.junit.experimental.categories.Category)3 PCollection (org.apache.beam.sdk.values.PCollection)2 Table (com.google.api.services.bigquery.model.Table)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableRow (com.google.api.services.bigquery.model.TableRow)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 TimePartitioning (com.google.api.services.bigquery.model.TimePartitioning)1 TableId (com.google.cloud.bigquery.TableId)1 TableResult (com.google.cloud.bigquery.TableResult)1 TableReadOptions (com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions)1 ReadSession (com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession)1 DataplexBigQueryToGcsOptions (com.google.cloud.teleport.v2.options.DataplexBigQueryToGcsOptions)1 BigQueryTableToGcsTransform (com.google.cloud.teleport.v2.transforms.BigQueryTableToGcsTransform)1 DeleteBigQueryDataFn (com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn)1