Search in sources :

Example 16 with ErrorProto

use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.

the class BigQueryIOWriteTest method testExtendedErrorRetrieval.

@Test
public void testExtendedErrorRetrieval() throws Exception {
    if (useStorageApi) {
        return;
    }
    TableRow row1 = new TableRow().set("name", "a").set("number", "1");
    TableRow row2 = new TableRow().set("name", "b").set("number", "2");
    TableRow row3 = new TableRow().set("name", "c").set("number", "3");
    String tableSpec = "project-id:dataset-id.table-id";
    TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
    TableDataInsertAllResponse.InsertErrors persistentError = new TableDataInsertAllResponse.InsertErrors().setErrors(Lists.newArrayList(new ErrorProto().setReason("invalidQuery")));
    fakeDatasetService.failOnInsert(ImmutableMap.of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));
    PCollection<BigQueryInsertError> failedRows = p.apply(Create.of(row1, row2, row3)).apply(BigQueryIO.writeTableRows().to(tableSpec).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).withTestServices(fakeBqServices).withoutValidation().withExtendedErrorInfo()).getFailedInsertsWithErr();
    // row2 finally fails with a non-retryable error, so we expect to see it in the collection of
    // failed rows.
    PAssert.that(failedRows).containsInAnyOrder(new BigQueryInsertError(row2, persistentError, BigQueryHelpers.parseTableSpec(tableSpec)));
    p.run();
    // Only row1 and row3 were successfully inserted.
    assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row3));
}
Also used : ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 17 with ErrorProto

use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.

the class BigQueryIOWriteTest method testFailuresNoRetryPolicy.

@Test
public void testFailuresNoRetryPolicy() throws Exception {
    if (useStorageApi || !useStreaming) {
        return;
    }
    TableRow row1 = new TableRow().set("name", "a").set("number", "1");
    TableRow row2 = new TableRow().set("name", "b").set("number", "2");
    TableRow row3 = new TableRow().set("name", "c").set("number", "3");
    TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
    fakeDatasetService.failOnInsert(ImmutableMap.of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError)));
    p.apply(Create.of(row1, row2, row3)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3));
}
Also used : ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 18 with ErrorProto

use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.

the class BigQueryHelpersTest method testPendingJobManager.

@Test
public void testPendingJobManager() throws Exception {
    PendingJobManager jobManager = new PendingJobManager(BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(Integer.MAX_VALUE).withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.millis(10)).backoff()));
    Set<String> succeeded = Sets.newHashSet();
    for (int i = 0; i < 5; i++) {
        Job currentJob = new Job();
        currentJob.setKind(" bigquery#job");
        PendingJob pendingJob = new PendingJob(retryId -> {
            if (new Random().nextInt(2) == 0) {
                throw new RuntimeException("Failing to start.");
            }
            currentJob.setJobReference(new JobReference().setProjectId("").setLocation("").setJobId(retryId.getJobId()));
            return null;
        }, retryId -> {
            if (retryId.getRetryIndex() < 5) {
                currentJob.setStatus(new JobStatus().setErrorResult(new ErrorProto()));
            } else {
                currentJob.setStatus(new JobStatus().setErrorResult(null));
            }
            return currentJob;
        }, retryId -> {
            if (retryId.getJobId().equals(currentJob.getJobReference().getJobId())) {
                return currentJob;
            } else {
                return null;
            }
        }, 100, "JOB_" + i);
        jobManager.addPendingJob(pendingJob, j -> {
            succeeded.add(j.currentJobId.getJobId());
            return null;
        });
    }
    jobManager.waitForDone();
    Set<String> expectedJobs = ImmutableSet.of("JOB_0-5", "JOB_1-5", "JOB_2-5", "JOB_3-5", "JOB_4-5");
    assertEquals(expectedJobs, succeeded);
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) JobReference(com.google.api.services.bigquery.model.JobReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) Random(java.util.Random) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) PendingJobManager(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJobManager) PendingJob(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.PendingJob) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Example 19 with ErrorProto

use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.

the class FakeJobService method runLoadJob.

private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load) throws InterruptedException, IOException {
    TableReference destination = load.getDestinationTable();
    TableSchema schema = load.getSchema();
    List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
    WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
    CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
    Table existingTable = datasetService.getTable(destination);
    if (schema == null) {
        schema = existingTable.getSchema();
    }
    checkArgument(schema != null, "No schema specified");
    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
        return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
    }
    if (existingTable == null) {
        TableReference strippedDestination = destination.clone().setTableId(BigQueryHelpers.stripPartitionDecorator(destination.getTableId()));
        existingTable = new Table().setTableReference(strippedDestination).setSchema(schema);
        if (load.getTimePartitioning() != null) {
            existingTable = existingTable.setTimePartitioning(load.getTimePartitioning());
        }
        if (load.getClustering() != null) {
            existingTable = existingTable.setClustering(load.getClustering());
        }
        datasetService.createTable(existingTable);
    }
    List<TableRow> rows = Lists.newArrayList();
    for (ResourceId filename : sourceFiles) {
        if (load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON")) {
            rows.addAll(readJsonTableRows(filename.toString()));
        } else if (load.getSourceFormat().equals("AVRO")) {
            rows.addAll(readAvroTableRows(filename.toString(), schema));
        }
    }
    datasetService.insertAll(destination, rows, null);
    FileSystems.delete(sourceFiles);
    return new JobStatus().setState("DONE");
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) HashBasedTable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) TableRow(com.google.api.services.bigquery.model.TableRow) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition)

Example 20 with ErrorProto

use of com.google.api.services.bigquery.model.ErrorProto in project beam by apache.

the class FakeJobService method runCopyJob.

private JobStatus runCopyJob(JobConfigurationTableCopy copy) throws InterruptedException, IOException {
    List<TableReference> sources = copy.getSourceTables();
    TableReference destination = copy.getDestinationTable();
    WriteDisposition writeDisposition = WriteDisposition.valueOf(copy.getWriteDisposition());
    CreateDisposition createDisposition = CreateDisposition.valueOf(copy.getCreateDisposition());
    Table existingTable = datasetService.getTable(destination);
    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
        return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
    }
    TimePartitioning partitioning = null;
    Clustering clustering = null;
    TableSchema schema = null;
    boolean first = true;
    List<TableRow> allRows = Lists.newArrayList();
    for (TableReference source : sources) {
        Table table = checkNotNull(datasetService.getTable(source));
        if (!first) {
            if (!Objects.equals(partitioning, table.getTimePartitioning())) {
                return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
            }
            if (!Objects.equals(clustering, table.getClustering())) {
                return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
            }
            if (!Objects.equals(schema, table.getSchema())) {
                return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
            }
        }
        partitioning = table.getTimePartitioning();
        clustering = table.getClustering();
        schema = table.getSchema();
        first = false;
        allRows.addAll(datasetService.getAllRows(source.getProjectId(), source.getDatasetId(), source.getTableId()));
    }
    datasetService.createTable(new Table().setTableReference(destination).setSchema(schema).setTimePartitioning(partitioning).setClustering(clustering).setEncryptionConfiguration(copy.getDestinationEncryptionConfiguration()));
    datasetService.insertAll(destination, allRows, null);
    return new JobStatus().setState("DONE");
}
Also used : HashBasedTable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) Clustering(com.google.api.services.bigquery.model.Clustering) TimePartitioning(com.google.api.services.bigquery.model.TimePartitioning) JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) TableRow(com.google.api.services.bigquery.model.TableRow) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition)

Aggregations

ErrorProto (com.google.api.services.bigquery.model.ErrorProto)20 TableRow (com.google.api.services.bigquery.model.TableRow)14 Test (org.junit.Test)14 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)13 TableReference (com.google.api.services.bigquery.model.TableReference)11 JobStatus (com.google.api.services.bigquery.model.JobStatus)8 TableSchema (com.google.api.services.bigquery.model.TableSchema)8 Table (com.google.api.services.bigquery.model.Table)6 MockSleeper (com.google.api.client.testing.util.MockSleeper)5 InsertErrors (com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors)5 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)5 DatasetServiceImpl (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl)5 FailsafeValueInSingleWindow (org.apache.beam.sdk.values.FailsafeValueInSingleWindow)5 Job (com.google.api.services.bigquery.model.Job)4 JobReference (com.google.api.services.bigquery.model.JobReference)4 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)4 TableDataInsertAllRequest (com.google.api.services.bigquery.model.TableDataInsertAllRequest)3 CreateDisposition (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition)3 WriteDisposition (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition)3 RowToInsert (com.google.cloud.bigquery.InsertAllRequest.RowToInsert)2