Search in sources :

Example 1 with EncryptionConfiguration

use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.

the class CreateTableHelpers method tryCreateTable.

@SuppressWarnings({ "nullness" })
private static void tryCreateTable(DoFn<?, ?>.ProcessContext context, Supplier<TableSchema> schemaSupplier, TableDestination tableDestination, CreateDisposition createDisposition, String tableSpec, String kmsKey, BigQueryServices bqServices) {
    TableReference tableReference = tableDestination.getTableReference().clone();
    tableReference.setTableId(BigQueryHelpers.stripPartitionDecorator(tableReference.getTableId()));
    try (DatasetService datasetService = bqServices.getDatasetService(context.getPipelineOptions().as(BigQueryOptions.class))) {
        if (datasetService.getTable(tableReference) == null) {
            TableSchema tableSchema = schemaSupplier.get();
            checkArgument(tableSchema != null, "Unless create disposition is %s, a schema must be specified, i.e. " + "DynamicDestinations.getSchema() may not return null. " + "However, create disposition is %s, and " + " %s returned null for destination %s", CreateDisposition.CREATE_NEVER, createDisposition, tableDestination);
            Table table = new Table().setTableReference(tableReference).setSchema(tableSchema);
            if (tableDestination.getTableDescription() != null) {
                table = table.setDescription(tableDestination.getTableDescription());
            }
            if (tableDestination.getTimePartitioning() != null) {
                table.setTimePartitioning(tableDestination.getTimePartitioning());
                if (tableDestination.getClustering() != null) {
                    table.setClustering(tableDestination.getClustering());
                }
            }
            if (kmsKey != null) {
                table.setEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
            }
            datasetService.createTable(table);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    createdTables.add(tableSpec);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)

Example 2 with EncryptionConfiguration

use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.

the class WriteRename method startCopy.

private BigQueryHelpers.PendingJob startCopy(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, List<TableReference> tempTables, WriteDisposition writeDisposition, CreateDisposition createDisposition, String kmsKey, ValueProvider<String> loadJobProjectId) {
    JobConfigurationTableCopy copyConfig = new JobConfigurationTableCopy().setSourceTables(tempTables).setDestinationTable(ref).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name());
    if (kmsKey != null) {
        copyConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
    }
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
    String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
    BigQueryHelpers.PendingJob retryJob = new BigQueryHelpers.PendingJob(jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        LOG.info("Starting copy job for table {} using  {}, job id iteration {}", ref, jobRef, jobId.getRetryIndex());
        try {
            jobService.startCopyJob(jobRef, copyConfig);
        } catch (IOException | InterruptedException e) {
            LOG.warn("Copy job {} failed.", jobRef, e);
            throw new RuntimeException(e);
        }
        return null;
    }, // Function to poll the result of a load job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }, // Function to lookup a job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.getJob(jobRef);
        } catch (InterruptedException | IOException e) {
            throw new RuntimeException(e);
        }
    }, maxRetryJobs, jobIdPrefix);
    return retryJob;
}
Also used : JobReference(com.google.api.services.bigquery.model.JobReference) JobConfigurationTableCopy(com.google.api.services.bigquery.model.JobConfigurationTableCopy) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) IOException(java.io.IOException)

Example 3 with EncryptionConfiguration

use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.

the class WriteTables method startLoad.

private PendingJob startLoad(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, Clustering clustering, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, Set<SchemaUpdateOption> schemaUpdateOptions) {
    JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat(sourceFormat).setIgnoreUnknownValues(ignoreUnknownValues).setUseAvroLogicalTypes(useAvroLogicalTypes);
    if (schemaUpdateOptions != null) {
        List<String> options = schemaUpdateOptions.stream().map(Enum<SchemaUpdateOption>::name).collect(Collectors.toList());
        loadConfig.setSchemaUpdateOptions(options);
    }
    if (timePartitioning != null) {
        loadConfig.setTimePartitioning(timePartitioning);
        // only set clustering if timePartitioning is set
        if (clustering != null) {
            loadConfig.setClustering(clustering);
        }
    }
    if (kmsKey != null) {
        loadConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
    }
    String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
    PendingJob retryJob = new PendingJob(// Function to load the data.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        LOG.info("Loading {} files into {} using job {}, job id iteration {}", gcsUris.size(), ref, jobRef, jobId.getRetryIndex());
        try {
            jobService.startLoadJob(jobRef, loadConfig);
        } catch (IOException | InterruptedException e) {
            LOG.warn("Load job {} failed with {}", jobRef, e.toString());
            throw new RuntimeException(e);
        }
        return null;
    }, // Function to poll the result of a load job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }, // Function to lookup a job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.getJob(jobRef);
        } catch (InterruptedException | IOException e) {
            throw new RuntimeException(e);
        }
    }, maxRetryJobs, jobIdPrefix);
    return retryJob;
}
Also used : JobConfigurationLoad(com.google.api.services.bigquery.model.JobConfigurationLoad) JobReference(com.google.api.services.bigquery.model.JobReference) SchemaUpdateOption(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) IOException(java.io.IOException)

Example 4 with EncryptionConfiguration

use of com.google.api.services.bigquery.model.EncryptionConfiguration in project beam by apache.

the class BigQueryQueryHelper method executeQuery.

public static TableReference executeQuery(BigQueryServices bqServices, BigQueryOptions options, AtomicReference<JobStatistics> dryRunJobStats, String stepUuid, String query, Boolean flattenResults, Boolean useLegacySql, QueryPriority priority, @Nullable String location, @Nullable String queryTempDatasetId, @Nullable String kmsKey) throws InterruptedException, IOException {
    // Step 1: Find the effective location of the query.
    String effectiveLocation = location;
    DatasetService tableService = bqServices.getDatasetService(options);
    if (effectiveLocation == null) {
        List<TableReference> referencedTables = dryRunQueryIfNeeded(bqServices, options, dryRunJobStats, query, flattenResults, useLegacySql, location).getQuery().getReferencedTables();
        if (referencedTables != null && !referencedTables.isEmpty()) {
            TableReference referencedTable = referencedTables.get(0);
            effectiveLocation = tableService.getDataset(referencedTable.getProjectId(), referencedTable.getDatasetId()).getLocation();
        }
    }
    // Step 2: Create a temporary dataset in the query location only if the user has not specified a
    // temp dataset.
    String queryJobId = BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY);
    Optional<String> queryTempDatasetOpt = Optional.ofNullable(queryTempDatasetId);
    TableReference queryResultTable = createTempTableReference(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject(), queryJobId, queryTempDatasetOpt);
    boolean beamToCreateTempDataset = !queryTempDatasetOpt.isPresent();
    // Create dataset only if it has not been set by the user
    if (beamToCreateTempDataset) {
        LOG.info("Creating temporary dataset {} for query results", queryResultTable.getDatasetId());
        tableService.createDataset(queryResultTable.getProjectId(), queryResultTable.getDatasetId(), effectiveLocation, "Temporary tables for query results of job " + options.getJobName(), TimeUnit.DAYS.toMillis(1));
    } else {
        // If the user specified a temp dataset, check that the destination table does not
        // exist
        Table destTable = tableService.getTable(queryResultTable);
        checkArgument(destTable == null, "Refusing to write on existing table {} in the specified temp dataset {}", queryResultTable.getTableId(), queryResultTable.getDatasetId());
    }
    // Step 3: Execute the query. Generate a transient (random) query job ID, because this code may
    // be retried after the temporary dataset and table have been deleted by a previous attempt --
    // in that case, we want to regenerate the temporary dataset and table, and we'll need a fresh
    // query ID to do that.
    LOG.info("Exporting query results into temporary table {} using job {}", queryResultTable, queryJobId);
    JobReference jobReference = new JobReference().setProjectId(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject()).setLocation(effectiveLocation).setJobId(queryJobId);
    JobConfigurationQuery queryConfiguration = createBasicQueryConfig(query, flattenResults, useLegacySql).setAllowLargeResults(true).setDestinationTable(queryResultTable).setCreateDisposition("CREATE_IF_NEEDED").setWriteDisposition("WRITE_TRUNCATE").setPriority(priority.name());
    if (kmsKey != null) {
        queryConfiguration.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
    }
    JobService jobService = bqServices.getJobService(options);
    jobService.startQueryJob(jobReference, queryConfiguration);
    Job job = jobService.pollJob(jobReference, JOB_POLL_MAX_RETRIES);
    if (BigQueryHelpers.parseStatus(job) != Status.SUCCEEDED) {
        throw new IOException(String.format("Query job %s failed, status: %s", queryJobId, BigQueryHelpers.statusToPrettyString(job.getStatus())));
    }
    LOG.info("Query job {} completed", queryJobId);
    return queryResultTable;
}
Also used : Table(com.google.api.services.bigquery.model.Table) JobReference(com.google.api.services.bigquery.model.JobReference) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService) IOException(java.io.IOException) JobService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) Job(com.google.api.services.bigquery.model.Job)

Aggregations

EncryptionConfiguration (com.google.api.services.bigquery.model.EncryptionConfiguration)4 JobReference (com.google.api.services.bigquery.model.JobReference)3 IOException (java.io.IOException)3 Table (com.google.api.services.bigquery.model.Table)2 TableReference (com.google.api.services.bigquery.model.TableReference)2 DatasetService (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)2 Job (com.google.api.services.bigquery.model.Job)1 JobConfigurationLoad (com.google.api.services.bigquery.model.JobConfigurationLoad)1 JobConfigurationQuery (com.google.api.services.bigquery.model.JobConfigurationQuery)1 JobConfigurationTableCopy (com.google.api.services.bigquery.model.JobConfigurationTableCopy)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 PendingJob (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob)1 SchemaUpdateOption (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption)1 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)1 JobService (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService)1