use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryTableRowIteratorTest method testReadFromQueryNoTables.
/**
* Verifies that queries that reference no data can be read.
*/
@Test
public void testReadFromQueryNoTables() throws IOException, InterruptedException {
// Mock job inserting.
Job dryRunJob = new Job().setStatistics(new JobStatistics().setQuery(new JobStatistics2()));
Job insertedJob = new Job().setJobReference(new JobReference());
when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
// Mock job polling.
JobStatus status = new JobStatus().setState("DONE");
JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery().setDestinationTable(new TableReference().setProjectId("project").setDatasetId("tempdataset").setTableId("temptable"));
Job getJob = new Job().setJobReference(new JobReference()).setStatus(status).setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
when(mockJobsGet.execute()).thenReturn(getJob);
// Mock table schema fetch.
when(mockTablesGet.execute()).thenReturn(noTableQuerySchema());
byte[] photoBytes = "photograph".getBytes();
String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
// Mock table data fetch.
when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42, photoBytesEncoded)));
// Run query and verify
String query = String.format("SELECT \"Arthur\" as name, 42 as count, \"%s\" as photo", photoBytesEncoded);
JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
iterator.open();
assertTrue(iterator.advance());
TableRow row = iterator.getCurrent();
assertTrue(row.containsKey("name"));
assertTrue(row.containsKey("count"));
assertTrue(row.containsKey("photo"));
assertEquals("Arthur", row.get("name"));
assertEquals(42, row.get("count"));
assertEquals(photoBytesEncoded, row.get("photo"));
assertFalse(iterator.advance());
}
// Temp dataset created and later deleted.
verify(mockClient, times(2)).datasets();
verify(mockDatasets).insert(anyString(), any(Dataset.class));
verify(mockDatasetsInsert).execute();
verify(mockDatasets).delete(anyString(), anyString());
verify(mockDatasetsDelete).execute();
// Job inserted to run the query, polled once.
verify(mockClient, times(3)).jobs();
verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
verify(mockJobsInsert, times(2)).execute();
verify(mockJobs).get(anyString(), anyString());
verify(mockJobsGet).execute();
// Temp table get after query finish, deleted after reading.
verify(mockClient, times(2)).tables();
verify(mockTables, times(1)).get(anyString(), anyString(), anyString());
verify(mockTablesGet, times(1)).execute();
verify(mockTables).delete(anyString(), anyString(), anyString());
verify(mockTablesDelete).execute();
// Table data read.
verify(mockClient).tabledata();
verify(mockTabledata).list("project", "tempdataset", "temptable");
verify(mockTabledataList).execute();
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class StreamingWriteFn method finishBundle.
/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
}
tableRows.clear();
uniqueIdsForTableRows.clear();
for (ValueInSingleWindow<TableRow> row : failedInserts) {
context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
}
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryQuerySource method cleanupTempResource.
@Override
protected void cleanupTempResource(BigQueryOptions bqOptions) throws Exception {
TableReference tableToRemove = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
DatasetService tableService = bqServices.getDatasetService(bqOptions);
tableService.deleteTable(tableToRemove);
tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryQuerySource method getTableToExtract.
@Override
protected TableReference getTableToExtract(BigQueryOptions bqOptions) throws IOException, InterruptedException {
// 1. Find the location of the query.
String location = null;
List<TableReference> referencedTables = dryRunQueryIfNeeded(bqOptions).getQuery().getReferencedTables();
DatasetService tableService = bqServices.getDatasetService(bqOptions);
if (referencedTables != null && !referencedTables.isEmpty()) {
TableReference queryTable = referencedTables.get(0);
location = tableService.getTable(queryTable).getLocation();
}
// 2. Create the temporary dataset in the query location.
TableReference tableToExtract = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
tableService.createDataset(tableToExtract.getProjectId(), tableToExtract.getDatasetId(), location, "Dataset for BigQuery query job temporary table");
// 3. Execute the query.
String queryJobId = createJobIdToken(bqOptions.getJobName(), stepUuid) + "-query";
executeQuery(bqOptions.getProject(), queryJobId, tableToExtract, bqServices.getJobService(bqOptions));
return tableToExtract;
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQuerySourceBase method split.
@Override
public List<BoundedSource<TableRow>> split(long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
// another BigQuery extract job for the repeated split() calls.
if (cachedSplitResult == null) {
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
TableReference tableToExtract = getTableToExtract(bqOptions);
JobService jobService = bqServices.getJobService(bqOptions);
final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid);
String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid));
List<ResourceId> tempFiles = executeExtract(extractJobId, tableToExtract, jobService, bqOptions.getProject(), extractDestinationDir);
TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(tableToExtract).getSchema();
cleanupTempResource(bqOptions);
cachedSplitResult = checkNotNull(createSources(tempFiles, tableSchema));
}
return cachedSplitResult;
}
Aggregations