Search in sources :

Example 11 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryTableRowIteratorTest method testReadFromQuery.

/**
   * Verifies that when the query runs, the correct data is returned and the temporary dataset and
   * table are both cleaned up.
   */
@Test
public void testReadFromQuery() throws IOException, InterruptedException {
    // Mock job inserting.
    Job dryRunJob = new Job().setStatistics(new JobStatistics().setQuery(new JobStatistics2().setReferencedTables(ImmutableList.of(new TableReference()))));
    Job insertedJob = new Job().setJobReference(new JobReference());
    when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
    // Mock job polling.
    JobStatus status = new JobStatus().setState("DONE");
    JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery().setDestinationTable(new TableReference().setProjectId("project").setDatasetId("tempdataset").setTableId("temptable"));
    Job getJob = new Job().setJobReference(new JobReference()).setStatus(status).setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
    when(mockJobsGet.execute()).thenReturn(getJob);
    // Mock table schema fetch.
    when(mockTablesGet.execute()).thenReturn(tableWithLocation(), tableWithBasicSchema());
    byte[] photoBytes = "photograph".getBytes();
    String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
    // Mock table data fetch.
    when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42, photoBytesEncoded, "2000-01-01", "2000-01-01 00:00:00.000005", "00:00:00.000005")));
    // Run query and verify
    String query = "SELECT name, count, photo, anniversary_date, " + "anniversary_datetime, anniversary_time from table";
    JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
        iterator.open();
        assertTrue(iterator.advance());
        TableRow row = iterator.getCurrent();
        assertTrue(row.containsKey("name"));
        assertTrue(row.containsKey("answer"));
        assertTrue(row.containsKey("photo"));
        assertTrue(row.containsKey("anniversary_date"));
        assertTrue(row.containsKey("anniversary_datetime"));
        assertTrue(row.containsKey("anniversary_time"));
        assertEquals("Arthur", row.get("name"));
        assertEquals(42, row.get("answer"));
        assertEquals(photoBytesEncoded, row.get("photo"));
        assertEquals("2000-01-01", row.get("anniversary_date"));
        assertEquals("2000-01-01 00:00:00.000005", row.get("anniversary_datetime"));
        assertEquals("00:00:00.000005", row.get("anniversary_time"));
        assertFalse(iterator.advance());
    }
    // Temp dataset created and later deleted.
    verify(mockClient, times(2)).datasets();
    verify(mockDatasets).insert(anyString(), any(Dataset.class));
    verify(mockDatasetsInsert).execute();
    verify(mockDatasets).delete(anyString(), anyString());
    verify(mockDatasetsDelete).execute();
    // Job inserted to run the query, polled once.
    verify(mockClient, times(3)).jobs();
    verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
    verify(mockJobsInsert, times(2)).execute();
    verify(mockJobs).get(anyString(), anyString());
    verify(mockJobsGet).execute();
    // Temp table get after query finish, deleted after reading.
    verify(mockClient, times(3)).tables();
    verify(mockTables, times(2)).get(anyString(), anyString(), anyString());
    verify(mockTablesGet, times(2)).execute();
    verify(mockTables).delete(anyString(), anyString(), anyString());
    verify(mockTablesDelete).execute();
    // Table data read.
    verify(mockClient).tabledata();
    verify(mockTabledata).list("project", "tempdataset", "temptable");
    verify(mockTabledataList).execute();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) JobReference(com.google.api.services.bigquery.model.JobReference) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) Dataset(com.google.api.services.bigquery.model.Dataset) Matchers.anyString(org.mockito.Matchers.anyString) Matchers.containsString(org.hamcrest.Matchers.containsString) JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) JobConfiguration(com.google.api.services.bigquery.model.JobConfiguration) Test(org.junit.Test)

Example 12 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testInsertRetry.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} retries quota rate limited attempts.
   */
@Test
public void testInsertRetry() throws Exception {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
    rows.add(wrapTableRow(new TableRow()));
    // First response is 403 rate limited, second response has valid payload.
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
    when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
    verify(response, times(2)).getStatusCode();
    verify(response, times(2)).getContent();
    verify(response, times(2)).getContentType();
    expectedLogs.verifyInfo("BigQuery insertAll exceeded rate limit, retrying");
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ArrayList(java.util.ArrayList) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Example 13 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testInsertFailsGracefully.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} fails gracefully when persistent issues.
   */
@Test
public void testInsertFailsGracefully() throws Exception {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
    final TableDataInsertAllResponse row1Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L)));
    final TableDataInsertAllResponse row0Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L)));
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    // Always return 200.
    when(response.getStatusCode()).thenReturn(200);
    // Return row 1 failing, then we retry row 1 as row 0, and row 0 persistently fails.
    when(response.getContent()).thenReturn(toStream(row1Failed)).thenAnswer(new Answer<InputStream>() {

        @Override
        public InputStream answer(InvocationOnMock invocation) throws Throwable {
            return toStream(row0Failed);
        }
    });
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    // Expect it to fail.
    try {
        dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
        fail();
    } catch (IOException e) {
        assertThat(e, instanceOf(IOException.class));
        assertThat(e.getMessage(), containsString("Insert failed:"));
        assertThat(e.getMessage(), containsString("[{\"index\":0}]"));
    }
    // Verify the exact number of retries as well as log messages.
    verify(response, times(4)).getStatusCode();
    verify(response, times(4)).getContent();
    verify(response, times(4)).getContentType();
    expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Also used : DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) IOException(java.io.IOException) TableReference(com.google.api.services.bigquery.model.TableReference) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) InsertErrors(com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Example 14 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class FakeJobService method readRows.

private List<TableRow> readRows(String filename) throws IOException {
    Coder<TableRow> coder = TableRowJsonCoder.of();
    List<TableRow> tableRows = Lists.newArrayList();
    try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
        String line;
        while ((line = reader.readLine()) != null) {
            TableRow tableRow = coder.decode(new ByteArrayInputStream(line.getBytes(StandardCharsets.UTF_8)), Context.OUTER);
            tableRows.add(tableRow);
        }
    }
    return tableRows;
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) TableRow(com.google.api.services.bigquery.model.TableRow) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 15 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class FakeJobService method runLoadJob.

private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load) throws InterruptedException, IOException {
    TableReference destination = load.getDestinationTable();
    TableSchema schema = load.getSchema();
    List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
    WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
    CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
    checkArgument(load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON"));
    Table existingTable = datasetService.getTable(destination);
    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
        return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
    }
    datasetService.createTable(new Table().setTableReference(destination).setSchema(schema));
    List<TableRow> rows = Lists.newArrayList();
    for (ResourceId filename : sourceFiles) {
        rows.addAll(readRows(filename.toString()));
    }
    datasetService.insertAll(destination, rows, null);
    return new JobStatus().setState("DONE");
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) TableRow(com.google.api.services.bigquery.model.TableRow) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition)

Aggregations

TableRow (com.google.api.services.bigquery.model.TableRow)73 Test (org.junit.Test)43 TableReference (com.google.api.services.bigquery.model.TableReference)24 TableSchema (com.google.api.services.bigquery.model.TableSchema)18 Pipeline (org.apache.beam.sdk.Pipeline)16 KV (org.apache.beam.sdk.values.KV)15 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)14 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)14 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)13 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)12 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)11 Table (com.google.api.services.bigquery.model.Table)10 HashBasedTable (com.google.common.collect.HashBasedTable)10 JobStatus (com.google.api.services.bigquery.model.JobStatus)9 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Map (java.util.Map)8 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)7 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)6