use of com.google.api.services.bigquery.model.JobConfigurationLoad in project beam by apache.
the class WriteTables method startLoad.
private PendingJob startLoad(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, Clustering clustering, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, Set<SchemaUpdateOption> schemaUpdateOptions) {
JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat(sourceFormat).setIgnoreUnknownValues(ignoreUnknownValues).setUseAvroLogicalTypes(useAvroLogicalTypes);
if (schemaUpdateOptions != null) {
List<String> options = schemaUpdateOptions.stream().map(Enum<SchemaUpdateOption>::name).collect(Collectors.toList());
loadConfig.setSchemaUpdateOptions(options);
}
if (timePartitioning != null) {
loadConfig.setTimePartitioning(timePartitioning);
// only set clustering if timePartitioning is set
if (clustering != null) {
loadConfig.setClustering(clustering);
}
}
if (kmsKey != null) {
loadConfig.setDestinationEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey));
}
String projectId = loadJobProjectId == null || loadJobProjectId.get() == null ? ref.getProjectId() : loadJobProjectId.get();
String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());
PendingJob retryJob = new PendingJob(// Function to load the data.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
LOG.info("Loading {} files into {} using job {}, job id iteration {}", gcsUris.size(), ref, jobRef, jobId.getRetryIndex());
try {
jobService.startLoadJob(jobRef, loadConfig);
} catch (IOException | InterruptedException e) {
LOG.warn("Load job {} failed with {}", jobRef, e.toString());
throw new RuntimeException(e);
}
return null;
}, // Function to poll the result of a load job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}, // Function to lookup a job.
jobId -> {
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()).setLocation(bqLocation);
try {
return jobService.getJob(jobRef);
} catch (InterruptedException | IOException e) {
throw new RuntimeException(e);
}
}, maxRetryJobs, jobIdPrefix);
return retryJob;
}
use of com.google.api.services.bigquery.model.JobConfigurationLoad in project beam by apache.
the class BigQueryIOWriteTest method schemaUpdateOptionsTest.
void schemaUpdateOptionsTest(BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions) throws Exception {
TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1");
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("date").setType("DATE").setName("number").setType("INTEGER")));
Write<TableRow> writeTransform = BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withTestServices(fakeBqServices).withMethod(insertMethod).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND).withSchemaUpdateOptions(schemaUpdateOptions);
p.apply("Create" + insertMethod, Create.<TableRow>of(row)).apply("Write" + insertMethod, writeTransform);
p.run();
List<String> expectedOptions = schemaUpdateOptions.stream().map(SchemaUpdateOption::name).collect(Collectors.toList());
for (Job job : fakeJobService.getAllJobs()) {
JobConfigurationLoad configuration = job.getConfiguration().getLoad();
assertEquals(expectedOptions, configuration.getSchemaUpdateOptions());
}
}
use of com.google.api.services.bigquery.model.JobConfigurationLoad in project google-cloud-java by GoogleCloudPlatform.
the class WriteChannelConfiguration method toPb.
com.google.api.services.bigquery.model.JobConfiguration toPb() {
JobConfigurationLoad loadConfigurationPb = new JobConfigurationLoad();
loadConfigurationPb.setDestinationTable(destinationTable.toPb());
if (createDisposition != null) {
loadConfigurationPb.setCreateDisposition(createDisposition.toString());
}
if (writeDisposition != null) {
loadConfigurationPb.setWriteDisposition(writeDisposition.toString());
}
if (nullMarker != null) {
loadConfigurationPb.setNullMarker(nullMarker);
}
if (getCsvOptions() != null) {
CsvOptions csvOptions = getCsvOptions();
loadConfigurationPb.setFieldDelimiter(csvOptions.getFieldDelimiter()).setAllowJaggedRows(csvOptions.allowJaggedRows()).setAllowQuotedNewlines(csvOptions.allowQuotedNewLines()).setEncoding(csvOptions.getEncoding()).setQuote(csvOptions.getQuote());
if (csvOptions.getSkipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
}
if (formatOptions != null) {
loadConfigurationPb.setSourceFormat(formatOptions.getType());
}
loadConfigurationPb.setMaxBadRecords(maxBadRecords);
loadConfigurationPb.setIgnoreUnknownValues(ignoreUnknownValues);
if (getDatastoreBackupOptions() != null) {
DatastoreBackupOptions backupOptions = getDatastoreBackupOptions();
loadConfigurationPb.setProjectionFields(backupOptions.getProjectionFields());
}
if (schemaUpdateOptions != null) {
ImmutableList.Builder<String> schemaUpdateOptionsBuilder = new ImmutableList.Builder<>();
for (JobInfo.SchemaUpdateOption schemaUpdateOption : schemaUpdateOptions) {
schemaUpdateOptionsBuilder.add(schemaUpdateOption.name());
}
loadConfigurationPb.setSchemaUpdateOptions(schemaUpdateOptionsBuilder.build());
}
loadConfigurationPb.setAutodetect(autodetect);
return new com.google.api.services.bigquery.model.JobConfiguration().setLoad(loadConfigurationPb);
}
use of com.google.api.services.bigquery.model.JobConfigurationLoad in project google-cloud-java by GoogleCloudPlatform.
the class LoadJobConfiguration method toPb.
@Override
com.google.api.services.bigquery.model.JobConfiguration toPb() {
JobConfigurationLoad loadConfigurationPb = new JobConfigurationLoad();
loadConfigurationPb.setDestinationTable(destinationTable.toPb());
if (createDisposition != null) {
loadConfigurationPb.setCreateDisposition(createDisposition.toString());
}
if (writeDisposition != null) {
loadConfigurationPb.setWriteDisposition(writeDisposition.toString());
}
if (nullMarker != null) {
loadConfigurationPb.setNullMarker(nullMarker);
}
if (getCsvOptions() != null) {
CsvOptions csvOptions = getCsvOptions();
loadConfigurationPb.setFieldDelimiter(csvOptions.getFieldDelimiter()).setAllowJaggedRows(csvOptions.allowJaggedRows()).setAllowQuotedNewlines(csvOptions.allowQuotedNewLines()).setEncoding(csvOptions.getEncoding()).setQuote(csvOptions.getQuote());
if (csvOptions.getSkipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
}
if (formatOptions != null) {
loadConfigurationPb.setSourceFormat(formatOptions.getType());
}
loadConfigurationPb.setMaxBadRecords(maxBadRecords);
loadConfigurationPb.setIgnoreUnknownValues(ignoreUnknownValues);
if (getDatastoreBackupOptions() != null) {
DatastoreBackupOptions backOptions = getDatastoreBackupOptions();
loadConfigurationPb.setProjectionFields(backOptions.getProjectionFields());
}
if (sourceUris != null) {
loadConfigurationPb.setSourceUris(ImmutableList.copyOf(sourceUris));
}
if (schemaUpdateOptions != null) {
ImmutableList.Builder<String> schemaUpdateOptionsBuilder = new ImmutableList.Builder<>();
for (JobInfo.SchemaUpdateOption schemaUpdateOption : schemaUpdateOptions) {
schemaUpdateOptionsBuilder.add(schemaUpdateOption.name());
}
loadConfigurationPb.setSchemaUpdateOptions(schemaUpdateOptionsBuilder.build());
}
loadConfigurationPb.setAutodetect(autodetect);
return new com.google.api.services.bigquery.model.JobConfiguration().setLoad(loadConfigurationPb);
}
use of com.google.api.services.bigquery.model.JobConfigurationLoad in project beam by apache.
the class WriteTables method load.
private void load(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, @Nullable String tableDescription) throws InterruptedException, IOException {
JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema).setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()).setCreateDisposition(createDisposition.name()).setSourceFormat("NEWLINE_DELIMITED_JSON");
String projectId = ref.getProjectId();
Job lastFailedLoadJob = null;
for (int i = 0; i < BatchLoads.MAX_RETRY_JOBS; ++i) {
String jobId = jobIdPrefix + "-" + i;
JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId);
jobService.startLoadJob(jobRef, loadConfig);
Job loadJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
Status jobStatus = BigQueryHelpers.parseStatus(loadJob);
switch(jobStatus) {
case SUCCEEDED:
if (tableDescription != null) {
datasetService.patchTableDescription(ref, tableDescription);
}
return;
case UNKNOWN:
throw new RuntimeException(String.format("UNKNOWN status of load job [%s]: %s.", jobId, BigQueryHelpers.jobToPrettyString(loadJob)));
case FAILED:
lastFailedLoadJob = loadJob;
continue;
default:
throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.", jobStatus, BigQueryHelpers.jobToPrettyString(loadJob)));
}
}
throw new RuntimeException(String.format("Failed to create load job with id prefix %s, " + "reached max retries: %d, last failed load job: %s.", jobIdPrefix, BatchLoads.MAX_RETRY_JOBS, BigQueryHelpers.jobToPrettyString(lastFailedLoadJob)));
}
Aggregations