use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.
the class BigQueryIOWriteTest method testExtendedErrorRetrieval.
@Test
public void testExtendedErrorRetrieval() throws Exception {
if (useStorageApi) {
return;
}
TableRow row1 = new TableRow().set("name", "a").set("number", "1");
TableRow row2 = new TableRow().set("name", "b").set("number", "2");
TableRow row3 = new TableRow().set("name", "c").set("number", "3");
String tableSpec = "project-id:dataset-id.table-id";
TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
TableDataInsertAllResponse.InsertErrors persistentError = new TableDataInsertAllResponse.InsertErrors().setErrors(Lists.newArrayList(new ErrorProto().setReason("invalidQuery")));
fakeDatasetService.failOnInsert(ImmutableMap.of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));
PCollection<BigQueryInsertError> failedRows = p.apply(Create.of(row1, row2, row3)).apply(BigQueryIO.writeTableRows().to(tableSpec).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).withTestServices(fakeBqServices).withoutValidation().withExtendedErrorInfo()).getFailedInsertsWithErr();
// row2 finally fails with a non-retryable error, so we expect to see it in the collection of
// failed rows.
PAssert.that(failedRows).containsInAnyOrder(new BigQueryInsertError(row2, persistentError, BigQueryHelpers.parseTableSpec(tableSpec)));
p.run();
// Only row1 and row3 were successfully inserted.
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row3));
}
use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.
the class BigQueryIOWriteTest method testFailuresNoRetryPolicy.
@Test
public void testFailuresNoRetryPolicy() throws Exception {
if (useStorageApi || !useStreaming) {
return;
}
TableRow row1 = new TableRow().set("name", "a").set("number", "1");
TableRow row2 = new TableRow().set("name", "b").set("number", "2");
TableRow row3 = new TableRow().set("name", "c").set("number", "3");
TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
fakeDatasetService.failOnInsert(ImmutableMap.of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError)));
p.apply(Create.of(row1, row2, row3)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3));
}
use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.
the class BigQueryHelpersTest method testPendingJobManager.
@Test
public void testPendingJobManager() throws Exception {
PendingJobManager jobManager = new PendingJobManager(BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(Integer.MAX_VALUE).withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.millis(10)).backoff()));
Set<String> succeeded = Sets.newHashSet();
for (int i = 0; i < 5; i++) {
Job currentJob = new Job();
currentJob.setKind(" bigquery#job");
PendingJob pendingJob = new PendingJob(retryId -> {
if (new Random().nextInt(2) == 0) {
throw new RuntimeException("Failing to start.");
}
currentJob.setJobReference(new JobReference().setProjectId("").setLocation("").setJobId(retryId.getJobId()));
return null;
}, retryId -> {
if (retryId.getRetryIndex() < 5) {
currentJob.setStatus(new JobStatus().setErrorResult(new ErrorProto()));
} else {
currentJob.setStatus(new JobStatus().setErrorResult(null));
}
return currentJob;
}, retryId -> {
if (retryId.getJobId().equals(currentJob.getJobReference().getJobId())) {
return currentJob;
} else {
return null;
}
}, 100, "JOB_" + i);
jobManager.addPendingJob(pendingJob, j -> {
succeeded.add(j.currentJobId.getJobId());
return null;
});
}
jobManager.waitForDone();
Set<String> expectedJobs = ImmutableSet.of("JOB_0-5", "JOB_1-5", "JOB_2-5", "JOB_3-5", "JOB_4-5");
assertEquals(expectedJobs, succeeded);
}
use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.
the class FakeJobService method runLoadJob.
private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load) throws InterruptedException, IOException {
TableReference destination = load.getDestinationTable();
TableSchema schema = load.getSchema();
List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
Table existingTable = datasetService.getTable(destination);
if (schema == null) {
schema = existingTable.getSchema();
}
checkArgument(schema != null, "No schema specified");
if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
if (existingTable == null) {
TableReference strippedDestination = destination.clone().setTableId(BigQueryHelpers.stripPartitionDecorator(destination.getTableId()));
existingTable = new Table().setTableReference(strippedDestination).setSchema(schema);
if (load.getTimePartitioning() != null) {
existingTable = existingTable.setTimePartitioning(load.getTimePartitioning());
}
if (load.getClustering() != null) {
existingTable = existingTable.setClustering(load.getClustering());
}
datasetService.createTable(existingTable);
}
List<TableRow> rows = Lists.newArrayList();
for (ResourceId filename : sourceFiles) {
if (load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON")) {
rows.addAll(readJsonTableRows(filename.toString()));
} else if (load.getSourceFormat().equals("AVRO")) {
rows.addAll(readAvroTableRows(filename.toString(), schema));
}
}
datasetService.insertAll(destination, rows, null);
FileSystems.delete(sourceFiles);
return new JobStatus().setState("DONE");
}
use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.
the class FakeJobService method runCopyJob.
private JobStatus runCopyJob(JobConfigurationTableCopy copy) throws InterruptedException, IOException {
List<TableReference> sources = copy.getSourceTables();
TableReference destination = copy.getDestinationTable();
WriteDisposition writeDisposition = WriteDisposition.valueOf(copy.getWriteDisposition());
CreateDisposition createDisposition = CreateDisposition.valueOf(copy.getCreateDisposition());
Table existingTable = datasetService.getTable(destination);
if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
TimePartitioning partitioning = null;
Clustering clustering = null;
TableSchema schema = null;
boolean first = true;
List<TableRow> allRows = Lists.newArrayList();
for (TableReference source : sources) {
Table table = checkNotNull(datasetService.getTable(source));
if (!first) {
if (!Objects.equals(partitioning, table.getTimePartitioning())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
if (!Objects.equals(clustering, table.getClustering())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
if (!Objects.equals(schema, table.getSchema())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
}
partitioning = table.getTimePartitioning();
clustering = table.getClustering();
schema = table.getSchema();
first = false;
allRows.addAll(datasetService.getAllRows(source.getProjectId(), source.getDatasetId(), source.getTableId()));
}
datasetService.createTable(new Table().setTableReference(destination).setSchema(schema).setTimePartitioning(partitioning).setClustering(clustering).setEncryptionConfiguration(copy.getDestinationEncryptionConfiguration()));
datasetService.insertAll(destination, allRows, null);
return new JobStatus().setState("DONE");
}
Aggregations