Search in sources :

Example 76 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryTableRowIteratorTest method testReadFromQueryNoTables.

/**
   * Verifies that queries that reference no data can be read.
   */
@Test
public void testReadFromQueryNoTables() throws IOException, InterruptedException {
    // Mock job inserting.
    Job dryRunJob = new Job().setStatistics(new JobStatistics().setQuery(new JobStatistics2()));
    Job insertedJob = new Job().setJobReference(new JobReference());
    when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
    // Mock job polling.
    JobStatus status = new JobStatus().setState("DONE");
    JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery().setDestinationTable(new TableReference().setProjectId("project").setDatasetId("tempdataset").setTableId("temptable"));
    Job getJob = new Job().setJobReference(new JobReference()).setStatus(status).setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
    when(mockJobsGet.execute()).thenReturn(getJob);
    // Mock table schema fetch.
    when(mockTablesGet.execute()).thenReturn(noTableQuerySchema());
    byte[] photoBytes = "photograph".getBytes();
    String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
    // Mock table data fetch.
    when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42, photoBytesEncoded)));
    // Run query and verify
    String query = String.format("SELECT \"Arthur\" as name, 42 as count, \"%s\" as photo", photoBytesEncoded);
    JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
        iterator.open();
        assertTrue(iterator.advance());
        TableRow row = iterator.getCurrent();
        assertTrue(row.containsKey("name"));
        assertTrue(row.containsKey("count"));
        assertTrue(row.containsKey("photo"));
        assertEquals("Arthur", row.get("name"));
        assertEquals(42, row.get("count"));
        assertEquals(photoBytesEncoded, row.get("photo"));
        assertFalse(iterator.advance());
    }
    // Temp dataset created and later deleted.
    verify(mockClient, times(2)).datasets();
    verify(mockDatasets).insert(anyString(), any(Dataset.class));
    verify(mockDatasetsInsert).execute();
    verify(mockDatasets).delete(anyString(), anyString());
    verify(mockDatasetsDelete).execute();
    // Job inserted to run the query, polled once.
    verify(mockClient, times(3)).jobs();
    verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
    verify(mockJobsInsert, times(2)).execute();
    verify(mockJobs).get(anyString(), anyString());
    verify(mockJobsGet).execute();
    // Temp table get after query finish, deleted after reading.
    verify(mockClient, times(2)).tables();
    verify(mockTables, times(1)).get(anyString(), anyString(), anyString());
    verify(mockTablesGet, times(1)).execute();
    verify(mockTables).delete(anyString(), anyString(), anyString());
    verify(mockTablesDelete).execute();
    // Table data read.
    verify(mockClient).tabledata();
    verify(mockTabledata).list("project", "tempdataset", "temptable");
    verify(mockTabledataList).execute();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) JobReference(com.google.api.services.bigquery.model.JobReference) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) Dataset(com.google.api.services.bigquery.model.Dataset) Matchers.anyString(org.mockito.Matchers.anyString) Matchers.containsString(org.hamcrest.Matchers.containsString) JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) JobConfiguration(com.google.api.services.bigquery.model.JobConfiguration) Test(org.junit.Test)

Example 77 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class StreamingWriteFn method finishBundle.

/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
    for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
        TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
        flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
    }
    tableRows.clear();
    uniqueIdsForTableRows.clear();
    for (ValueInSingleWindow<TableRow> row : failedInserts) {
        context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 78 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryQuerySource method cleanupTempResource.

@Override
protected void cleanupTempResource(BigQueryOptions bqOptions) throws Exception {
    TableReference tableToRemove = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
    DatasetService tableService = bqServices.getDatasetService(bqOptions);
    tableService.deleteTable(tableToRemove);
    tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
}
Also used : BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)

Example 79 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryQuerySource method getTableToExtract.

@Override
protected TableReference getTableToExtract(BigQueryOptions bqOptions) throws IOException, InterruptedException {
    // 1. Find the location of the query.
    String location = null;
    List<TableReference> referencedTables = dryRunQueryIfNeeded(bqOptions).getQuery().getReferencedTables();
    DatasetService tableService = bqServices.getDatasetService(bqOptions);
    if (referencedTables != null && !referencedTables.isEmpty()) {
        TableReference queryTable = referencedTables.get(0);
        location = tableService.getTable(queryTable).getLocation();
    }
    // 2. Create the temporary dataset in the query location.
    TableReference tableToExtract = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
    tableService.createDataset(tableToExtract.getProjectId(), tableToExtract.getDatasetId(), location, "Dataset for BigQuery query job temporary table");
    // 3. Execute the query.
    String queryJobId = createJobIdToken(bqOptions.getJobName(), stepUuid) + "-query";
    executeQuery(bqOptions.getProject(), queryJobId, tableToExtract, bqServices.getJobService(bqOptions));
    return tableToExtract;
}
Also used : BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)

Example 80 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQuerySourceBase method split.

@Override
public List<BoundedSource<TableRow>> split(long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
    // another BigQuery extract job for the repeated split() calls.
    if (cachedSplitResult == null) {
        BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
        TableReference tableToExtract = getTableToExtract(bqOptions);
        JobService jobService = bqServices.getJobService(bqOptions);
        final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid);
        String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid));
        List<ResourceId> tempFiles = executeExtract(extractJobId, tableToExtract, jobService, bqOptions.getProject(), extractDestinationDir);
        TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(tableToExtract).getSchema();
        cleanupTempResource(bqOptions);
        cachedSplitResult = checkNotNull(createSources(tempFiles, tableSchema));
    }
    return cachedSplitResult;
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) JobService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService)

Aggregations

TableReference (com.google.api.services.bigquery.model.TableReference)139 Test (org.junit.Test)75 TableRow (com.google.api.services.bigquery.model.TableRow)68 Table (com.google.api.services.bigquery.model.Table)61 TableSchema (com.google.api.services.bigquery.model.TableSchema)36 DatasetServiceImpl (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl)29 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)22 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)19 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)19 MockSleeper (com.google.api.client.testing.util.MockSleeper)17 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)17 FailsafeValueInSingleWindow (org.apache.beam.sdk.values.FailsafeValueInSingleWindow)16 JobStatus (com.google.api.services.bigquery.model.JobStatus)15 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)15 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)15 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)14 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)14 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)14 ByteString (com.google.protobuf.ByteString)14 IOException (java.io.IOException)14