Search in sources :

Example 6 with JobConfigurationQuery

use of com.google.api.services.bigquery.model.JobConfigurationQuery in project beam by apache.

the class BigQueryQuerySource method executeQuery.

private void executeQuery(String executingProject, String jobId, TableReference destinationTable, JobService jobService) throws IOException, InterruptedException {
    JobReference jobRef = new JobReference().setProjectId(executingProject).setJobId(jobId);
    JobConfigurationQuery queryConfig = createBasicQueryConfig().setAllowLargeResults(true).setCreateDisposition("CREATE_IF_NEEDED").setDestinationTable(destinationTable).setPriority("BATCH").setWriteDisposition("WRITE_EMPTY");
    jobService.startQueryJob(jobRef, queryConfig);
    Job job = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
    if (BigQueryHelpers.parseStatus(job) != Status.SUCCEEDED) {
        throw new IOException(String.format("Query job %s failed, status: %s.", jobId, BigQueryHelpers.statusToPrettyString(job.getStatus())));
    }
}
Also used : JobReference(com.google.api.services.bigquery.model.JobReference) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) IOException(java.io.IOException) Job(com.google.api.services.bigquery.model.Job)

Example 7 with JobConfigurationQuery

use of com.google.api.services.bigquery.model.JobConfigurationQuery in project beam by apache.

the class BigQueryQueryHelper method executeQuery.

public static TableReference executeQuery(BigQueryServices bqServices, BigQueryOptions options, AtomicReference<JobStatistics> dryRunJobStats, String stepUuid, String query, Boolean flattenResults, Boolean useLegacySql, QueryPriority priority, @Nullable String location, @Nullable String queryTempDatasetId, @Nullable String kmsKey) throws InterruptedException, IOException {
    // Step 1: Find the effective location of the query.
    String effectiveLocation = location;
    DatasetService tableService = bqServices.getDatasetService(options);
    if (effectiveLocation == null) {
        List<TableReference> referencedTables = dryRunQueryIfNeeded(bqServices, options, dryRunJobStats, query, flattenResults, useLegacySql, location).getQuery().getReferencedTables();
        if (referencedTables != null && !referencedTables.isEmpty()) {
            TableReference referencedTable = referencedTables.get(0);
            effectiveLocation = tableService.getDataset(referencedTable.getProjectId(), referencedTable.getDatasetId()).getLocation();
        }
    }
    // Step 2: Create a temporary dataset in the query location only if the user has not specified a
    // temp dataset.
    String queryJobId = BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY);
    Optional<String> queryTempDatasetOpt = Optional.ofNullable(queryTempDatasetId);
    TableReference queryResultTable = createTempTableReference(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject(), queryJobId, queryTempDatasetOpt);
    boolean beamToCreateTempDataset = !queryTempDatasetOpt.isPresent();
    // Create dataset only if it has not been set by the user
    if (beamToCreateTempDataset) {
        LOG.info("Creating temporary dataset {} for query results", queryResultTable.getDatasetId());
        tableService.createDataset(queryResultTable.getProjectId(), queryResultTable.getDatasetId(), effectiveLocation, "Temporary tables for query results of job " + options.getJobName(), TimeUnit.DAYS.toMillis(1));
    } else {
        // If the user specified a temp dataset, check that the destination table does not
        // exist
        Table destTable = tableService.getTable(queryResultTable);
        checkArgument(destTable == null, "Refusing to write on existing table {} in the specified temp dataset {}", queryResultTable.getTableId(), queryResultTable.getDatasetId());
    }
    // Step 3: Execute the query. Generate a transient (random) query job ID, because this code may
    // be retried after the temporary dataset and table have been deleted by a previous attempt --
    // in that case, we want to regenerate the temporary dataset and table, and we'll need a fresh
    // query ID to do that.
    LOG.info("Exporting query results into temporary table {} using job {}", queryResultTable, queryJobId);
    JobReference jobReference = new JobReference().setProjectId(options.getBigQueryProject() == null ? options.getProject() : options.getBigQueryProject()).setLocation(effectiveLocation).setJobId(queryJobId);
    JobConfigurationQuery queryConfiguration = createBasicQueryConfig(query, flattenResults, useLegacySql).setAllowLargeResults(true).setDestinationTable(queryResultTable).setCreateDisposition("CREATE_IF_NEEDED").setWriteDisposition("WRITE_TRUNCATE").setPriority(priority.name());
    if (kmsKey != null) {
        queryConfiguration.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
    }
    JobService jobService = bqServices.getJobService(options);
    jobService.startQueryJob(jobReference, queryConfiguration);
    Job job = jobService.pollJob(jobReference, JOB_POLL_MAX_RETRIES);
    if (BigQueryHelpers.parseStatus(job) != Status.SUCCEEDED) {
        throw new IOException(String.format("Query job %s failed, status: %s", queryJobId, BigQueryHelpers.statusToPrettyString(job.getStatus())));
    }
    LOG.info("Query job {} completed", queryJobId);
    return queryResultTable;
}
Also used : Table(com.google.api.services.bigquery.model.Table) JobReference(com.google.api.services.bigquery.model.JobReference) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService) IOException(java.io.IOException) JobService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) Job(com.google.api.services.bigquery.model.Job)

Aggregations

JobConfigurationQuery (com.google.api.services.bigquery.model.JobConfigurationQuery)7 Job (com.google.api.services.bigquery.model.Job)6 JobReference (com.google.api.services.bigquery.model.JobReference)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 JobConfiguration (com.google.api.services.bigquery.model.JobConfiguration)3 TableRow (com.google.api.services.bigquery.model.TableRow)3 IOException (java.io.IOException)3 Matchers.containsString (org.hamcrest.Matchers.containsString)3 Test (org.junit.Test)3 Matchers.anyString (org.mockito.Matchers.anyString)3 Dataset (com.google.api.services.bigquery.model.Dataset)2 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)2 JobStatistics2 (com.google.api.services.bigquery.model.JobStatistics2)2 JobStatus (com.google.api.services.bigquery.model.JobStatus)2 Table (com.google.api.services.bigquery.model.Table)2 EncryptionConfiguration (com.google.api.services.bigquery.model.EncryptionConfiguration)1 GetQueryResultsResponse (com.google.api.services.bigquery.model.GetQueryResultsResponse)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 SchemaUpdateOption (com.google.cloud.bigquery.JobInfo.SchemaUpdateOption)1 ImmutableList (com.google.common.collect.ImmutableList)1