Search in sources :

Example 1 with PendingJob

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob in project beam by apache.

the class WriteTables method startLoad.

private PendingJob startLoad(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, Clustering clustering, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, Set<SchemaUpdateOption> schemaUpdateOptions) {
    JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat(sourceFormat).setIgnoreUnknownValues(ignoreUnknownValues).setUseAvroLogicalTypes(useAvroLogicalTypes);
    if (schemaUpdateOptions != null) {
        List<String> options = schemaUpdateOptions.stream().map(Enum<SchemaUpdateOption>::name).collect(Collectors.toList());
        loadConfig.setSchemaUpdateOptions(options);
    }
    if (timePartitioning != null) {
        loadConfig.setTimePartitioning(timePartitioning);
        // only set clustering if timePartitioning is set
        if (clustering != null) {
            loadConfig.setClustering(clustering);
        }
    }
    if (kmsKey != null) {
        loadConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
    }
    String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
    PendingJob retryJob = new PendingJob(// Function to load the data.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        LOG.info("Loading {} files into {} using job {}, job id iteration {}", gcsUris.size(), ref, jobRef, jobId.getRetryIndex());
        try {
            jobService.startLoadJob(jobRef, loadConfig);
        } catch (IOException | InterruptedException e) {
            LOG.warn("Load job {} failed with {}", jobRef, e.toString());
            throw new RuntimeException(e);
        }
        return null;
    }, // Function to poll the result of a load job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }, // Function to lookup a job.
    jobId -> {
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
        try {
            return jobService.getJob(jobRef);
        } catch (InterruptedException | IOException e) {
            throw new RuntimeException(e);
        }
    }, maxRetryJobs, jobIdPrefix);
    return retryJob;
}
Also used : JobConfigurationLoad(com.google.api.services.bigquery.model.JobConfigurationLoad) JobReference(com.google.api.services.bigquery.model.JobReference) SchemaUpdateOption(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption) EncryptionConfiguration(com.google.api.services.bigquery.model.EncryptionConfiguration) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) IOException(java.io.IOException)

Example 2 with PendingJob

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob in project beam by apache.

the class BigQueryHelpersTest method testPendingJobManager.

@Test
public void testPendingJobManager() throws Exception {
    PendingJobManager jobManager = new PendingJobManager(BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(Integer.MAX_VALUE).withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.millis(10)).backoff()));
    Set<String> succeeded = Sets.newHashSet();
    for (int i = 0; i < 5; i++) {
        Job currentJob = new Job();
        currentJob.setKind(" bigquery#job");
        PendingJob pendingJob = new PendingJob(retryId -> {
            if (new Random().nextInt(2) == 0) {
                throw new RuntimeException("Failing to start.");
            }
            currentJob.setJobReference(new JobReference().setProjectId("").setLocation("").setJobId(retryId.getJobId()));
            return null;
        }, retryId -> {
            if (retryId.getRetryIndex() < 5) {
                currentJob.setStatus(new JobStatus().setErrorResult(new ErrorProto()));
            } else {
                currentJob.setStatus(new JobStatus().setErrorResult(null));
            }
            return currentJob;
        }, retryId -> {
            if (retryId.getJobId().equals(currentJob.getJobReference().getJobId())) {
                return currentJob;
            } else {
                return null;
            }
        }, 100, "JOB_" + i);
        jobManager.addPendingJob(pendingJob, j -> {
            succeeded.add(j.currentJobId.getJobId());
            return null;
        });
    }
    jobManager.waitForDone();
    Set<String> expectedJobs = ImmutableSet.of("JOB_0-5", "JOB_1-5", "JOB_2-5", "JOB_3-5", "JOB_4-5");
    assertEquals(expectedJobs, succeeded);
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) JobReference(com.google.api.services.bigquery.model.JobReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) Random(java.util.Random) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) PendingJobManager(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJobManager) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Aggregations

JobReference (com.google.api.services.bigquery.model.JobReference)2 PendingJob (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob)2 EncryptionConfiguration (com.google.api.services.bigquery.model.EncryptionConfiguration)1 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)1 Job (com.google.api.services.bigquery.model.Job)1 JobConfigurationLoad (com.google.api.services.bigquery.model.JobConfigurationLoad)1 JobStatus (com.google.api.services.bigquery.model.JobStatus)1 IOException (java.io.IOException)1 Random (java.util.Random)1 PendingJobManager (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJobManager)1 SchemaUpdateOption (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption)1 Test (org.junit.Test)1