use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService in project beam by apache.
the class WriteRename method processElement.
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
Map<TableDestination, Iterable<String>> tempTablesMap = Maps.newHashMap(c.sideInput(tempTablesView));
// Process each destination table.
for (Map.Entry<TableDestination, Iterable<String>> entry : tempTablesMap.entrySet()) {
TableDestination finalTableDestination = entry.getKey();
List<String> tempTablesJson = Lists.newArrayList(entry.getValue());
// Do not copy if no temp tables are provided
if (tempTablesJson.size() == 0) {
return;
}
List<TableReference> tempTables = Lists.newArrayList();
for (String table : tempTablesJson) {
tempTables.add(BigQueryHelpers.fromJsonString(table, TableReference.class));
}
// Make sure each destination table gets a unique job id.
String jobIdPrefix = BigQueryHelpers.createJobId(c.sideInput(jobIdToken), finalTableDestination, -1);
copy(bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)), bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)), jobIdPrefix, finalTableDestination.getTableReference(), tempTables, writeDisposition, createDisposition, finalTableDestination.getTableDescription());
DatasetService tableService = bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
removeTemporaryTables(tableService, tempTables);
}
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService in project beam by apache.
the class CreateTableHelpers method tryCreateTable.
@SuppressWarnings({ "nullness" })
private static void tryCreateTable(DoFn<?, ?>.ProcessContext context, Supplier<TableSchema> schemaSupplier, TableDestination tableDestination, CreateDisposition createDisposition, String tableSpec, String kmsKey, BigQueryServices bqServices) {
TableReference tableReference = tableDestination.getTableReference().clone();
tableReference.setTableId(BigQueryHelpers.stripPartitionDecorator(tableReference.getTableId()));
try (DatasetService datasetService = bqServices.getDatasetService(context.getPipelineOptions().as(BigQueryOptions.class))) {
if (datasetService.getTable(tableReference) == null) {
TableSchema tableSchema = schemaSupplier.get();
checkArgument(tableSchema != null, "Unless create disposition is %s, a schema must be specified, i.e. " + "DynamicDestinations.getSchema() may not return null. " + "However, create disposition is %s, and " + " %s returned null for destination %s", CreateDisposition.CREATE_NEVER, createDisposition, tableDestination);
Table table = new Table().setTableReference(tableReference).setSchema(tableSchema);
if (tableDestination.getTableDescription() != null) {
table = table.setDescription(tableDestination.getTableDescription());
}
if (tableDestination.getTimePartitioning() != null) {
table.setTimePartitioning(tableDestination.getTimePartitioning());
if (tableDestination.getClustering() != null) {
table.setClustering(tableDestination.getClustering());
}
}
if (kmsKey != null) {
table.setEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
datasetService.createTable(table);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
createdTables.add(tableSpec);
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService in project beam by apache.
the class StorageApiFinalizeWritesDoFn method finishBundle.
@FinishBundle
@SuppressWarnings({ "nullness" })
public void finishBundle(PipelineOptions pipelineOptions) throws Exception {
DatasetService datasetService = getDatasetService(pipelineOptions);
for (Map.Entry<String, Collection<String>> entry : commitStreams.entrySet()) {
final String tableId = entry.getKey();
final Collection<String> streamNames = entry.getValue();
final Set<String> alreadyCommittedStreams = Sets.newHashSet();
RetryManager<BatchCommitWriteStreamsResponse, Context<BatchCommitWriteStreamsResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
retryManager.addOperation(c -> {
Iterable<String> streamsToCommit = Iterables.filter(streamNames, s -> !alreadyCommittedStreams.contains(s));
batchCommitOperationsSent.inc();
return datasetService.commitWriteStreams(tableId, streamsToCommit);
}, contexts -> {
LOG.error("BatchCommit failed. tableId " + tableId + " streamNames " + streamNames + " error: " + Iterables.getFirst(contexts, null).getError());
batchCommitOperationsFailed.inc();
return RetryType.RETRY_ALL_OPERATIONS;
}, c -> {
LOG.info("BatchCommit succeeded for tableId " + tableId + " response " + c.getResult());
batchCommitOperationsSucceeded.inc();
}, response -> {
if (!response.hasCommitTime()) {
for (StorageError storageError : response.getStreamErrorsList()) {
if (storageError.getCode() == StorageErrorCode.STREAM_ALREADY_COMMITTED) {
// Make sure that we don't retry any streams that are already committed.
alreadyCommittedStreams.add(storageError.getEntity());
}
}
Iterable<String> streamsToCommit = Iterables.filter(streamNames, s -> !alreadyCommittedStreams.contains(s));
// retry.
return Iterables.isEmpty(streamsToCommit);
}
return true;
}, new Context<>());
retryManager.run(true);
}
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService in project beam by apache.
the class BigQueryQuerySource method cleanupTempResource.
@Override
protected void cleanupTempResource(BigQueryOptions bqOptions) throws Exception {
TableReference tableToRemove = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
DatasetService tableService = bqServices.getDatasetService(bqOptions);
tableService.deleteTable(tableToRemove);
tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService in project beam by apache.
the class BigQueryQuerySource method getTableToExtract.
@Override
protected TableReference getTableToExtract(BigQueryOptions bqOptions) throws IOException, InterruptedException {
// 1. Find the location of the query.
String location = null;
List<TableReference> referencedTables = dryRunQueryIfNeeded(bqOptions).getQuery().getReferencedTables();
DatasetService tableService = bqServices.getDatasetService(bqOptions);
if (referencedTables != null && !referencedTables.isEmpty()) {
TableReference queryTable = referencedTables.get(0);
location = tableService.getTable(queryTable).getLocation();
}
// 2. Create the temporary dataset in the query location.
TableReference tableToExtract = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
tableService.createDataset(tableToExtract.getProjectId(), tableToExtract.getDatasetId(), location, "Dataset for BigQuery query job temporary table");
// 3. Execute the query.
String queryJobId = createJobIdToken(bqOptions.getJobName(), stepUuid) + "-query";
executeQuery(bqOptions.getProject(), queryJobId, tableToExtract, bqServices.getJobService(bqOptions));
return tableToExtract;
}
Aggregations