use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.
the class CreateTableHelpers method tryCreateTable.
@SuppressWarnings({ "nullness" })
private static void tryCreateTable(DoFn<?, ?>.ProcessContext context, Supplier<TableSchema> schemaSupplier, TableDestination tableDestination, CreateDisposition createDisposition, String tableSpec, String kmsKey, BigQueryServices bqServices) {
TableReference tableReference = tableDestination.getTableReference().clone();
tableReference.setTableId(BigQueryHelpers.stripPartitionDecorator(tableReference.getTableId()));
try (DatasetService datasetService = bqServices.getDatasetService(context.getPipelineOptions().as(BigQueryOptions.class))) {
if (datasetService.getTable(tableReference) == null) {
TableSchema tableSchema = schemaSupplier.get();
checkArgument(tableSchema != null, "Unless create disposition is %s, a schema must be specified, i.e. " + "DynamicDestinations.getSchema() may not return null. " + "However, create disposition is %s, and " + " %s returned null for destination %s", CreateDisposition.CREATE_NEVER, createDisposition, tableDestination);
Table table = new Table().setTableReference(tableReference).setSchema(tableSchema);
if (tableDestination.getTableDescription() != null) {
table = table.setDescription(tableDestination.getTableDescription());
}
if (tableDestination.getTimePartitioning() != null) {
table.setTimePartitioning(tableDestination.getTimePartitioning());
if (tableDestination.getClustering() != null) {
table.setClustering(tableDestination.getClustering());
}
}
if (kmsKey != null) {
table.setEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
datasetService.createTable(table);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
createdTables.add(tableSpec);
}
use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.
the class WriteRename method startCopy.
private BigQueryHelpers.PendingJob startCopy(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, List<TableReference> tempTables, WriteDisposition writeDisposition, CreateDisposition createDisposition, String kmsKey, ValueProvider<String> loadJobProjectId) {
JobConfigurationTableCopy copyConfig = new JobConfigurationTableCopy().setSourceTables(tempTables).setDestinationTable(ref).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name());
if (kmsKey != null) {
copyConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
BigQueryHelpers.PendingJob retryJob = new BigQueryHelpers.PendingJob(jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
LOG.info("Starting copy job for table {} using {}, job id iteration {}", ref, jobRef, jobId.getRetryIndex());
try {
jobService.startCopyJob(jobRef, copyConfig);
} catch (IOException | InterruptedException e) {
LOG.warn("Copy job {} failed.", jobRef, e);
throw new RuntimeException(e);
}
return null;
}, // Function to poll the result of a load job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}, // Function to lookup a job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.getJob(jobRef);
} catch (InterruptedException | IOException e) {
throw new RuntimeException(e);
}
}, maxRetryJobs, jobIdPrefix);
return retryJob;
}
use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.
the class WriteTables method startLoad.
private PendingJob startLoad(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, Clustering clustering, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, Set<SchemaUpdateOption> schemaUpdateOptions) {
JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat(sourceFormat).setIgnoreUnknownValues(ignoreUnknownValues).setUseAvroLogicalTypes(useAvroLogicalTypes);
if (schemaUpdateOptions != null) {
List<String> options = schemaUpdateOptions.stream().map(Enum<SchemaUpdateOption>::name).collect(Collectors.toList());
loadConfig.setSchemaUpdateOptions(options);
}
if (timePartitioning != null) {
loadConfig.setTimePartitioning(timePartitioning);
// only set clustering if timePartitioning is set
if (clustering != null) {
loadConfig.setClustering(clustering);
}
}
if (kmsKey != null) {
loadConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
PendingJob retryJob = new PendingJob(// Function to load the data.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
LOG.info("Loading {} files into {} using job {}, job id iteration {}", gcsUris.size(), ref, jobRef, jobId.getRetryIndex());
try {
jobService.startLoadJob(jobRef, loadConfig);
} catch (IOException | InterruptedException e) {
LOG.warn("Load job {} failed with {}", jobRef, e.toString());
throw new RuntimeException(e);
}
return null;
}, // Function to poll the result of a load job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}, // Function to lookup a job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.getJob(jobRef);
} catch (InterruptedException | IOException e) {
throw new RuntimeException(e);
}
}, maxRetryJobs, jobIdPrefix);
return retryJob;
}
use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.
the class BigQueryQueryHelper method executeQuery.
public static TableReference executeQuery(BigQueryServices bqServices, BigQueryOptions options, AtomicReference<JobStatistics> dryRunJobStats, String stepUuid, String query, Boolean flattenResults, Boolean useLegacySql, QueryPriority priority, @Nullable String location, @Nullable String queryTempDatasetId, @Nullable String kmsKey) throws InterruptedException, IOException {
// Step 1: Find the effective location of the query.
String effectiveLocation = location;
DatasetService tableService = bqServices.getDatasetService(options);
if (effectiveLocation == null) {
List<TableReference> referencedTables = dryRunQueryIfNeeded(bqServices, options, dryRunJobStats, query, flattenResults, useLegacySql, location).getQuery().getReferencedTables();
if (referencedTables != null && !referencedTables.isEmpty()) {
TableReference referencedTable = referencedTables.get(0);
effectiveLocation = tableService.getDataset(referencedTable.getProjectId(), referencedTable.getDatasetId()).getLocation();
}
}
// Step 2: Create a temporary dataset in the query location only if the user has not specified a
// temp dataset.
String queryJobId = BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY);
Optional<String> queryTempDatasetOpt = Optional.ofNullable(queryTempDatasetId);
TableReference queryResultTable = createTempTableReference(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject(), queryJobId, queryTempDatasetOpt);
boolean beamToCreateTempDataset = !queryTempDatasetOpt.isPresent();
// Create dataset only if it has not been set by the user
if (beamToCreateTempDataset) {
LOG.info("Creating temporary dataset {} for query results", queryResultTable.getDatasetId());
tableService.createDataset(queryResultTable.getProjectId(), queryResultTable.getDatasetId(), effectiveLocation, "Temporary tables for query results of job " + options.getJobName(), TimeUnit.DAYS.toMillis(1));
} else {
// If the user specified a temp dataset, check that the destination table does not
// exist
Table destTable = tableService.getTable(queryResultTable);
checkArgument(destTable == null, "Refusing to write on existing table {} in the specified temp dataset {}", queryResultTable.getTableId(), queryResultTable.getDatasetId());
}
// Step 3: Execute the query. Generate a transient (random) query job ID, because this code may
// be retried after the temporary dataset and table have been deleted by a previous attempt --
// in that case, we want to regenerate the temporary dataset and table, and we'll need a fresh
// query ID to do that.
LOG.info("Exporting query results into temporary table {} using job {}", queryResultTable, queryJobId);
JobReference jobReference = new JobReference().setProjectId(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject()).setLocation(effectiveLocation).setJobId(queryJobId);
JobConfigurationQuery queryConfiguration = createBasicQueryConfig(query, flattenResults, useLegacySql).setAllowLargeResults(true).setDestinationTable(queryResultTable).setCreateDisposition("CREATE_IF_NEEDED").setWriteDisposition("WRITE_TRUNCATE").setPriority(priority.name());
if (kmsKey != null) {
queryConfiguration.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
JobService jobService = bqServices.getJobService(options);
jobService.startQueryJob(jobReference, queryConfiguration);
Job job = jobService.pollJob(jobReference, JOB_POLL_MAX_RETRIES);
if (BigQueryHelpers.parseStatus(job) != Status.SUCCEEDED) {
throw new IOException(String.format("Query job %s failed, status: %s", queryJobId, BigQueryHelpers.statusToPrettyString(job.getStatus())));
}
LOG.info("Query job {} completed", queryJobId);
return queryResultTable;
}
Aggregations