use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables.
@Test
public void test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables() {
BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
BigQueryTablePartition p = partition().setPartitionName("partition1").build();
options.setTables(null);
options.setExportDataModifiedBeforeDateTime(null);
options.setFileFormat(FileFormatOptions.AVRO);
options.setWriteDisposition(WriteDispositionOptions.OVERWRITE);
Filter f = new DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
assertThat(f.shouldSkipUnpartitionedTable(t)).isFalse();
assertThat(f.shouldSkipPartition(t, p)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenTablesSet_filterExcludesTablesByName.
@Test
public void test_whenTablesSet_filterExcludesTablesByName() {
BigQueryTable.Builder includedTable1 = table().setTableName("includedTable1");
BigQueryTable.Builder includedTable2 = table().setTableName("includedTable2");
BigQueryTable.Builder excludedTable = table().setTableName("excludedTable");
BigQueryTablePartition p = partition().build();
options.setTables("includedTable1,includedTable2");
options.setExportDataModifiedBeforeDateTime(null);
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipUnpartitionedTable(includedTable1)).isFalse();
assertThat(f.shouldSkipUnpartitionedTable(includedTable2)).isFalse();
assertThat(f.shouldSkipUnpartitionedTable(excludedTable)).isTrue();
assertThat(f.shouldSkipPartitionedTable(includedTable1, Collections.singletonList(p))).isFalse();
assertThat(f.shouldSkipPartitionedTable(includedTable2, Collections.singletonList(p))).isFalse();
assertThat(f.shouldSkipPartitionedTable(excludedTable, Collections.singletonList(p))).isTrue();
assertThat(f.shouldSkipPartition(includedTable1, p)).isFalse();
assertThat(f.shouldSkipPartition(includedTable2, p)).isFalse();
// Should NOT skip PARTITIONS, only tables as a whole because of their name:
assertThat(f.shouldSkipPartition(excludedTable, p)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenBeforeDateIs1Day3HoursDuration_dateParsedCorrectly.
@Test
public void test_whenBeforeDateIs1Day3HoursDuration_dateParsedCorrectly() {
// current time in the DEFAULT time zone minus 1 day 3 hours:
long micros = Instant.now().minus(Duration.millis(27 * 60 * 60 * 1000)).getMillis() * 1000L;
BigQueryTable.Builder olderTable = table().setLastModificationTime(micros - 100000L);
BigQueryTable.Builder newerTable = table().setLastModificationTime(micros + 100000L);
options.setTables(null);
options.setExportDataModifiedBeforeDateTime("-p1dt3h");
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipUnpartitionedTable(newerTable)).isTrue();
assertThat(f.shouldSkipUnpartitionedTable(olderTable)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenBeforeDateSet_filterExcludesTablesAndPartitions.
@Test
public void test_whenBeforeDateSet_filterExcludesTablesAndPartitions() {
BigQueryTable.Builder olderTable = table().setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC - 1000L);
BigQueryTable.Builder newerTable = table().setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC + 1000L);
BigQueryTablePartition olderPartition = partition().setPartitionName("p1").setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC - 1000L).build();
BigQueryTablePartition newerPartition = partition().setPartitionName("p2").setLastModificationTime(TS_MICROS_2021_01_01_15_00_00_UTC + 1000L).build();
List<BigQueryTablePartition> partitions = Arrays.asList(olderPartition, newerPartition);
options.setTables(null);
options.setExportDataModifiedBeforeDateTime("2021-01-01T15:00:00Z");
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipUnpartitionedTable(newerTable)).isTrue();
assertThat(f.shouldSkipUnpartitionedTable(olderTable)).isFalse();
// If a table is partitioned, we should filter individual partitions by modification time,
// so the table itself should NOT be skipped no matter what the table modification time is.
// Expecting shouldSkip = false for both newer and older tables:
assertThat(f.shouldSkipPartitionedTable(newerTable, partitions)).isFalse();
assertThat(f.shouldSkipPartitionedTable(olderTable, partitions)).isFalse();
assertThat(f.shouldSkipPartition(olderTable, newerPartition)).isTrue();
assertThat(f.shouldSkipPartition(olderTable, olderPartition)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionSKIP_filterExcludesTables.
@Test
public void test_whenTargetFileExistsWithWriteDispositionSKIP_filterExcludesTables() {
BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
BigQueryTablePartition p = partition().setPartitionName("partition1").build();
options.setTables(null);
options.setExportDataModifiedBeforeDateTime(null);
options.setFileFormat(FileFormatOptions.AVRO);
options.setWriteDisposition(WriteDispositionOptions.SKIP);
Filter f = new DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
assertThat(f.shouldSkipUnpartitionedTable(t)).isTrue();
assertThat(f.shouldSkipPartition(t, p)).isTrue();
}
Aggregations