use of com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenPartitionedTableHasNoPartitions_filterExcludesTable.
@Test
public void test_whenPartitionedTableHasNoPartitions_filterExcludesTable() {
options.setTables(null);
options.setExportDataModifiedBeforeDateTime(null);
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipPartitionedTable(table(), Collections.emptyList())).isTrue();
}
use of com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenNoFilterOptions_filterAcceptsAllTablesAndPartitions.
@Test
public void test_whenNoFilterOptions_filterAcceptsAllTablesAndPartitions() {
BigQueryTable.Builder t = table();
BigQueryTablePartition p = partition().build();
options.setTables(null);
options.setExportDataModifiedBeforeDateTime(null);
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipUnpartitionedTable(t)).isFalse();
assertThat(f.shouldSkipPartitionedTable(t, Collections.singletonList(p))).isFalse();
assertThat(f.shouldSkipPartition(t, p)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method applyPartitionedWriteDispositionFilter.
private static PCollection<GenericRecord> applyPartitionedWriteDispositionFilter(PCollection<GenericRecord> genericRecords, DataplexJdbcIngestionOptions options, String targetRootPath, org.apache.avro.Schema avroSchema, List<String> existingFiles) {
PCollectionTuple filteredRecordsTuple = genericRecords.apply("Filter pre-existing records", new DataplexJdbcIngestionFilter(targetRootPath, Schemas.serialize(avroSchema), options.getParitionColumn(), options.getPartitioningScheme(), options.getFileFormat().getFileSuffix(), options.getWriteDisposition(), existingFiles, FILTERED_RECORDS_OUT, EXISTING_TARGET_FILES_OUT));
filteredRecordsTuple.get(EXISTING_TARGET_FILES_OUT).apply(Distinct.create()).apply("Log existing target file names", ParDo.of(// PCollection will be empty.
new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
String filename = c.element();
LOG.info("Target File {} already exists in the output asset bucket {}. Performing " + " {} writeDisposition strategy.", filename, targetRootPath, options.getWriteDisposition());
}
}));
return filteredRecordsTuple.get(FILTERED_RECORDS_OUT);
}
use of com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenBeforeDateHasNoTime_dateParsedCorrectly.
@Test
public void test_whenBeforeDateHasNoTime_dateParsedCorrectly() {
// 2021-02-15 in the DEFAULT time zone:
long micros = Instant.parse("2021-02-15T00:00:00").getMillis() * 1000L;
BigQueryTable.Builder newerTable = table().setLastModificationTime(micros - 1000L);
BigQueryTable.Builder olderTable = table().setLastModificationTime(micros + 1000L);
options.setTables(null);
options.setExportDataModifiedBeforeDateTime("2021-02-15");
Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
assertThat(f.shouldSkipUnpartitionedTable(olderTable)).isTrue();
assertThat(f.shouldSkipUnpartitionedTable(newerTable)).isFalse();
}
use of com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsFilterTest method test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables.
@Test
public void test_whenTargetFileExistsWithWriteDispositionOverwrite_filterAcceptsTables() {
BigQueryTable.Builder t = table().setTableName("table1").setPartitioningColumn("p2");
BigQueryTablePartition p = partition().setPartitionName("partition1").build();
options.setTables(null);
options.setExportDataModifiedBeforeDateTime(null);
options.setFileFormat(FileFormatOptions.AVRO);
options.setWriteDisposition(WriteDispositionOptions.OVERWRITE);
Filter f = new DataplexBigQueryToGcsFilter(options, Arrays.asList("table1/output-table1.avro", "table1/p2=partition1/output-table1-partition1.avro"));
assertThat(f.shouldSkipUnpartitionedTable(t)).isFalse();
assertThat(f.shouldSkipPartition(t, p)).isFalse();
}
Aggregations