Search in sources :

Example 66 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testInsertRetryPolicy.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
   * and returns the list of rows not retried.
   */
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
    // First time row0 fails with a retryable error, and row1 fails with a persistent error.
    final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
    // Second time there is only one row, which fails with a retryable error.
    final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
    // On the final attempt, no failures are returned.
    final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    // Always return 200.
    when(response.getStatusCode()).thenReturn(200);
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(200).thenReturn(200);
    // First fail
    when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
    assertEquals(1, failedInserts.size());
    expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) InsertErrors(com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Example 67 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testInsertDoesNotRetry.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} does not retry non-rate-limited attempts.
   */
@Test
public void testInsertDoesNotRetry() throws Throwable {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
    rows.add(wrapTableRow(new TableRow()));
    // First response is 403 not-rate-limited, second response has valid payload but should not
    // be invoked.
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
    when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
    thrown.expect(GoogleJsonResponseException.class);
    thrown.expectMessage("actually forbidden");
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    try {
        dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
        fail();
    } catch (RuntimeException e) {
        verify(response, times(1)).getStatusCode();
        verify(response, times(1)).getContent();
        verify(response, times(1)).getContentType();
        throw e.getCause();
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ArrayList(java.util.ArrayList) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Example 68 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testIsTableEmptySucceeds.

@Test
public void testIsTableEmptySucceeds() throws Exception {
    TableReference tableRef = new TableReference().setProjectId("projectId").setDatasetId("datasetId").setTableId("tableId");
    TableDataList testDataList = new TableDataList().setRows(ImmutableList.of(new TableRow()));
    // First response is 403 rate limited, second response has valid payload.
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
    when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403))).thenReturn(toStream(testDataList));
    BigQueryServicesImpl.DatasetServiceImpl datasetService = new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    assertFalse(datasetService.isTableEmpty(tableRef, BackOff.ZERO_BACKOFF, Sleeper.DEFAULT));
    verify(response, times(2)).getStatusCode();
    verify(response, times(2)).getContent();
    verify(response, times(2)).getContentType();
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataList(com.google.api.services.bigquery.model.TableDataList) Test(org.junit.Test)

Example 69 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testBuildWriteWithSchema.

@Test
public void testBuildWriteWithSchema() {
    TableSchema schema = new TableSchema();
    BigQueryIO.Write<TableRow> write = BigQueryIO.<TableRow>write().to("foo.com:project:somedataset.sometable").withSchema(schema);
    checkWriteObject(write, "foo.com:project", "somedataset", "sometable", schema, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Test(org.junit.Test)

Example 70 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testWrite.

@Test
public void testWrite() throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("defaultproject", "dataset-id", "", "");
    Pipeline p = TestPipeline.create(bqOptions);
    p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to("dataset-id.table-id").withTableDescription(null).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    File tempDir = new File(bqOptions.getTempLocation());
    testNumFiles(tempDir, 0);
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) File(java.io.File) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

TableRow (com.google.api.services.bigquery.model.TableRow)73 Test (org.junit.Test)43 TableReference (com.google.api.services.bigquery.model.TableReference)24 TableSchema (com.google.api.services.bigquery.model.TableSchema)18 Pipeline (org.apache.beam.sdk.Pipeline)16 KV (org.apache.beam.sdk.values.KV)15 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)14 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)14 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)13 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)12 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)11 Table (com.google.api.services.bigquery.model.Table)10 HashBasedTable (com.google.common.collect.HashBasedTable)10 JobStatus (com.google.api.services.bigquery.model.JobStatus)9 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Map (java.util.Map)8 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)7 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)6