use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob in project beam by apache.
the class WriteTables method startLoad.
private PendingJob startLoad(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, Clustering clustering, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, Set<SchemaUpdateOption> schemaUpdateOptions) {
JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat(sourceFormat).setIgnoreUnknownValues(ignoreUnknownValues).setUseAvroLogicalTypes(useAvroLogicalTypes);
if (schemaUpdateOptions != null) {
List<String> options = schemaUpdateOptions.stream().map(Enum<SchemaUpdateOption>::name).collect(Collectors.toList());
loadConfig.setSchemaUpdateOptions(options);
}
if (timePartitioning != null) {
loadConfig.setTimePartitioning(timePartitioning);
// only set clustering if timePartitioning is set
if (clustering != null) {
loadConfig.setClustering(clustering);
}
}
if (kmsKey != null) {
loadConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
PendingJob retryJob = new PendingJob(// Function to load the data.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
LOG.info("Loading {} files into {} using job {}, job id iteration {}", gcsUris.size(), ref, jobRef, jobId.getRetryIndex());
try {
jobService.startLoadJob(jobRef, loadConfig);
} catch (IOException | InterruptedException e) {
LOG.warn("Load job {} failed with {}", jobRef, e.toString());
throw new RuntimeException(e);
}
return null;
}, // Function to poll the result of a load job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}, // Function to lookup a job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.getJob(jobRef);
} catch (InterruptedException | IOException e) {
throw new RuntimeException(e);
}
}, maxRetryJobs, jobIdPrefix);
return retryJob;
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob in project beam by apache.
the class BigQueryHelpersTest method testPendingJobManager.
@Test
public void testPendingJobManager() throws Exception {
PendingJobManager jobManager = new PendingJobManager(BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(Integer.MAX_VALUE).withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.millis(10)).backoff()));
Set<String> succeeded = Sets.newHashSet();
for (int i = 0; i < 5; i++) {
Job currentJob = new Job();
currentJob.setKind(" bigquery#job");
PendingJob pendingJob = new PendingJob(retryId -> {
if (new Random().nextInt(2) == 0) {
throw new RuntimeException("Failing to start.");
}
currentJob.setJobReference(new JobReference().setProjectId("").setLocation("").setJobId(retryId.getJobId()));
return null;
}, retryId -> {
if (retryId.getRetryIndex() < 5) {
currentJob.setStatus(new JobStatus().setErrorResult(new ErrorProto()));
} else {
currentJob.setStatus(new JobStatus().setErrorResult(null));
}
return currentJob;
}, retryId -> {
if (retryId.getJobId().equals(currentJob.getJobReference().getJobId())) {
return currentJob;
} else {
return null;
}
}, 100, "JOB_" + i);
jobManager.addPendingJob(pendingJob, j -> {
succeeded.add(j.currentJobId.getJobId());
return null;
});
}
jobManager.waitForDone();
Set<String> expectedJobs = ImmutableSet.of("JOB_0-5", "JOB_1-5", "JOB_2-5", "JOB_3-5", "JOB_4-5");
assertEquals(expectedJobs, succeeded);
}
Aggregations