use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcs method buildPipeline.
/**
* Builds the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The resulting pipeline.
*/
@VisibleForTesting
static Pipeline buildPipeline(DataplexBigQueryToGcsOptions options, BigQueryMetadataLoader metadataLoader, String targetRootPath, DatasetId datasetId) throws ExecutionException, InterruptedException {
Pipeline pipeline = Pipeline.create(options);
List<String> existingTargetFiles = StorageUtils.getFilesInDirectory(targetRootPath);
LOG.info("Loading BigQuery metadata...");
List<BigQueryTable> tables = metadataLoader.loadDatasetMetadata(datasetId, new DataplexBigQueryToGcsFilter(options, existingTargetFiles));
LOG.info("Loaded {} table(s).", tables.size());
if (!tables.isEmpty()) {
transformPipeline(pipeline, tables, options, targetRootPath, null, null);
} else {
pipeline.apply("Nothing to export", new NoopTransform());
}
return pipeline;
}
use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.
the class DeleteBigQueryDataFn method processElement.
@ProcessElement
public void processElement(@Element KV<BigQueryTable, BigQueryTablePartition> input, PipelineOptions options) {
BigQueryTable t = input.getKey();
BigQueryTablePartition p = input.getValue();
if (t.isPartitioned() && p == null) {
throw new IllegalStateException(String.format("No partition to delete provided for a partitioned table %s.", t.getTableName()));
}
if (!t.isPartitioned() && p != null) {
throw new IllegalStateException(String.format("Got unexpected partition %s to delete for a non-partitioned table %s.", p.getPartitionName(), t.getTableName()));
}
if (!options.as(Options.class).getDeleteSourceData()) {
if (t.isPartitioned()) {
LOG.info("Skipping source BigQuery data deletion for partition {}${}.", t.getTableName(), p.getPartitionName());
} else {
LOG.info("Skipping source BigQuery data deletion for table {}.", t.getTableName());
}
return;
}
if (t.isPartitioned()) {
deletePartition(t, p);
} else {
deleteTable(t);
}
}
use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcsTest method setUp.
@Before
public void setUp() throws InterruptedException, IOException {
options = TestPipeline.testingPipelineOptions().as(DataplexBigQueryToGcsOptions.class);
options.setProject(PROJECT);
options.setUpdateDataplexMetadata(true);
options.setEnforceSamePartitionKey(false);
// Required when using BigQueryIO.withMethod(EXPORT).
options.setTempLocation(tmpDir.newFolder("bqTmp").getAbsolutePath());
outDir = tmpDir.newFolder("out");
bqSchema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("ts").setType("TIMESTAMP"), new TableFieldSchema().setName("s1").setType("STRING"), new TableFieldSchema().setName("d1").setType("DATE"), new TableFieldSchema().setName("i1").setType("INTEGER")));
avroSchema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"__root__\",\"fields\":" + "[{\"name\":\"ts\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}]}," + "{\"name\":\"s1\",\"type\":[\"null\",\"string\"]}," + "{\"name\":\"d1\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}]}," + "{\"name\":\"i1\",\"type\":[\"null\",\"long\"]}]}");
long modTime = System.currentTimeMillis() * 1000;
BigQueryTablePartition p1 = BigQueryTablePartition.builder().setPartitionName("p1").setLastModificationTime(modTime).build();
BigQueryTablePartition p2 = BigQueryTablePartition.builder().setPartitionName("p2").setLastModificationTime(modTime).build();
BigQueryTable t1 = BigQueryTable.builder().setTableName("partitioned_table").setProject(PROJECT).setDataset(DATASET).setSchema(avroSchema).setLastModificationTime(modTime).setPartitioningColumn("ts").setPartitions(Arrays.asList(p1, p2)).build();
BigQueryTable t2 = BigQueryTable.builder().setTableName("unpartitioned_table").setProject(PROJECT).setDataset(DATASET).setSchema(avroSchema).setLastModificationTime(modTime).build();
tableByName = new HashMap<>();
tableByName.put(t1.getTableName(), t1);
tableByName.put(t2.getTableName(), t2);
defaultRecords = new TableRow[] { new TableRow().set("ts", 1L).set("s1", "1001").set("d1", "1970-01-01").set("i1", 2001L), new TableRow().set("ts", 2L).set("s1", "1002").set("d1", "1970-01-02").set("i1", 2002L), new TableRow().set("ts", 3L).set("s1", "1003").set("d1", "1970-01-03").set("i1", 2003L), new TableRow().set("ts", 4L).set("s1", "1004").set("d1", "1970-01-04").set("i1", null), new TableRow().set("ts", 5L).set("s1", "1005").set("d1", "1970-01-05").set("i1", 2005L) };
defaultExpectedRecords = new String[] { "{\"ts\": 1, \"s1\": \"1001\", \"d1\": 0, \"i1\": 2001}", "{\"ts\": 2, \"s1\": \"1002\", \"d1\": 1, \"i1\": 2002}", "{\"ts\": 3, \"s1\": \"1003\", \"d1\": 2, \"i1\": 2003}", "{\"ts\": 4, \"s1\": \"1004\", \"d1\": 3, \"i1\": null}", "{\"ts\": 5, \"s1\": \"1005\", \"d1\": 4, \"i1\": 2005}" };
FakeDatasetService.setUp();
fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset(PROJECT, DATASET, "", "", null);
fakeDatasetService.createTable(new Table().setTableReference(t1.toTableReference()).setSchema(bqSchema).setRequirePartitionFilter(true).setTimePartitioning(new TimePartitioning().setField("ts").setType("DAY")));
fakeDatasetService.createTable(new Table().setTableReference(t2.toTableReference()).setSchema(bqSchema));
fakeJobService = new CustomFakeJobService();
bqFakeServices = new FakeBigQueryServices().withJobService(fakeJobService).withDatasetService(fakeDatasetService);
when(tableResultMock.iterateAll()).thenReturn(Collections.singleton(fields("unpartitioned_table", "0", null)));
when(bqMock.query(any())).thenReturn(tableResultMock);
when(bqMock.delete(any(TableId.class))).thenReturn(true);
when(bqsMock.createReadSession(any())).thenReturn(ReadSession.newBuilder().setAvroSchema(AvroSchema.newBuilder().setSchema(avroSchema.toString())).build());
metadataLoader = new BigQueryMetadataLoader(bqMock, bqsMock, MAX_PARALLEL_REQUESTS);
}
use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.
the class DeleteBigQueryDataFnTest method testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions.
/**
* Test that DeleteBigQueryDataFn doesn't attempt to delete special BigQuery partitions even if
* {@code deleteSourceData = true}.
*
* <p>As per <a
* href="https://cloud.google.com/bigquery/docs/managing-partitioned-tables#delete_a_partition">
* this documentation</a>, special partitions "__NULL__" and "__UNPARTITIONED__" cannot be
* deleted.
*/
@Test
@Category(NeedsRunner.class)
public void testTransform_withDeleteSourceDataEnabled_doesntTruncateSpecialPartitions() {
Options options = TestPipeline.testingPipelineOptions().as(Options.class);
options.setDeleteSourceData(true);
BigQueryTablePartition.Builder builder = BigQueryTablePartition.builder().setLastModificationTime(System.currentTimeMillis() * 1000);
BigQueryTablePartition p1 = builder.setPartitionName("__NULL__").build();
BigQueryTablePartition p2 = builder.setPartitionName("__UNPARTITIONED__").build();
BigQueryTablePartition p3 = builder.setPartitionName("NORMAL_PARTITION").build();
BigQueryTable t1 = table.toBuilder().setPartitions(Arrays.asList(p1, p2, p3)).setPartitioningColumn("column-name-doesnt-matter").build();
DeleteBigQueryDataFn fn = new DeleteBigQueryDataFn().withTestBqClientFactory(() -> bqMock);
testPipeline.apply("CreateInput", Create.of(KV.of(t1, p1), KV.of(t1, p2), KV.of(t1, p3)).withCoder(fnCoder)).apply("TestDeleteBigQueryDataFn", ParDo.of(fn));
testPipeline.run(options);
verify(bqMock, times(1)).delete(TableId.of("pr1", "d1", "t1$NORMAL_PARTITION"));
verifyNoMoreInteractions(bqMock);
}
use of com.google.cloud.teleport.v2.values.BigQueryTable in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcs method transformPipeline.
@VisibleForTesting
static void transformPipeline(Pipeline pipeline, List<BigQueryTable> tables, DataplexBigQueryToGcsOptions options, String targetRootPath, BigQueryServices testBqServices, BigQueryClientFactory testBqClientFactory) {
List<PCollection<KV<BigQueryTable, KV<BigQueryTablePartition, String>>>> fileCollections = new ArrayList<>(tables.size());
tables.forEach(table -> {
fileCollections.add(pipeline.apply(String.format("ExportTable-%s", table.getTableName()), new BigQueryTableToGcsTransform(table, targetRootPath, options.getFileFormat(), options.getFileCompression(), options.getEnforceSamePartitionKey()).withTestServices(testBqServices)).apply(String.format("AttachTableKeys-%s", table.getTableName()), WithKeys.of(table)));
});
PCollection<KV<BigQueryTable, KV<BigQueryTablePartition, String>>> exportFileResults = PCollectionList.of(fileCollections).apply("FlattenTableResults", Flatten.pCollections());
PCollection<Void> metadataUpdateResults = exportFileResults.apply("UpdateDataplexMetadata", new UpdateDataplexBigQueryToGcsExportMetadataTransform());
exportFileResults.apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptor.of(BigQueryTable.class), TypeDescriptor.of(BigQueryTablePartition.class))).via((SerializableFunction<KV<BigQueryTable, KV<BigQueryTablePartition, String>>, KV<BigQueryTable, BigQueryTablePartition>>) input -> KV.of(input.getKey(), input.getValue().getKey()))).apply("WaitForMetadataUpdate", Wait.on(metadataUpdateResults)).apply("TruncateBigQueryData", ParDo.of(new DeleteBigQueryDataFn().withTestBqClientFactory(testBqClientFactory)));
}
Aggregations