use of com.google.api.services.bigquery.model.JobConfiguration in project beam by apache.
the class FakeJobService method startCopyJob.
@Override
public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig) throws IOException, InterruptedException {
synchronized (allJobs) {
Job job = new Job();
job.setJobReference(jobRef);
job.setConfiguration(new JobConfiguration().setCopy(copyConfig));
job.setKind(" bigquery#job");
job.setStatus(new JobStatus().setState("PENDING"));
allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
}
}
use of com.google.api.services.bigquery.model.JobConfiguration in project beam by apache.
the class BigQueryTableRowIteratorTest method testReadFromQuery.
/**
* Verifies that when the query runs, the correct data is returned and the temporary dataset and
* table are both cleaned up.
*/
@Test
public void testReadFromQuery() throws IOException, InterruptedException {
// Mock job inserting.
Job dryRunJob = new Job().setStatistics(new JobStatistics().setQuery(new JobStatistics2().setReferencedTables(ImmutableList.of(new TableReference()))));
Job insertedJob = new Job().setJobReference(new JobReference());
when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
// Mock job polling.
JobStatus status = new JobStatus().setState("DONE");
JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery().setDestinationTable(new TableReference().setProjectId("project").setDatasetId("tempdataset").setTableId("temptable"));
Job getJob = new Job().setJobReference(new JobReference()).setStatus(status).setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
when(mockJobsGet.execute()).thenReturn(getJob);
// Mock table schema fetch.
when(mockTablesGet.execute()).thenReturn(tableWithLocation(), tableWithBasicSchema());
byte[] photoBytes = "photograph".getBytes();
String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
// Mock table data fetch.
when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42, photoBytesEncoded, "2000-01-01", "2000-01-01 00:00:00.000005", "00:00:00.000005")));
// Run query and verify
String query = "SELECT name, count, photo, anniversary_date, " + "anniversary_datetime, anniversary_time from table";
JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
iterator.open();
assertTrue(iterator.advance());
TableRow row = iterator.getCurrent();
assertTrue(row.containsKey("name"));
assertTrue(row.containsKey("answer"));
assertTrue(row.containsKey("photo"));
assertTrue(row.containsKey("anniversary_date"));
assertTrue(row.containsKey("anniversary_datetime"));
assertTrue(row.containsKey("anniversary_time"));
assertEquals("Arthur", row.get("name"));
assertEquals(42, row.get("answer"));
assertEquals(photoBytesEncoded, row.get("photo"));
assertEquals("2000-01-01", row.get("anniversary_date"));
assertEquals("2000-01-01 00:00:00.000005", row.get("anniversary_datetime"));
assertEquals("00:00:00.000005", row.get("anniversary_time"));
assertFalse(iterator.advance());
}
// Temp dataset created and later deleted.
verify(mockClient, times(2)).datasets();
verify(mockDatasets).insert(anyString(), any(Dataset.class));
verify(mockDatasetsInsert).execute();
verify(mockDatasets).delete(anyString(), anyString());
verify(mockDatasetsDelete).execute();
// Job inserted to run the query, polled once.
verify(mockClient, times(3)).jobs();
verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
verify(mockJobsInsert, times(2)).execute();
verify(mockJobs).get(anyString(), anyString());
verify(mockJobsGet).execute();
// Temp table get after query finish, deleted after reading.
verify(mockClient, times(3)).tables();
verify(mockTables, times(2)).get(anyString(), anyString(), anyString());
verify(mockTablesGet, times(2)).execute();
verify(mockTables).delete(anyString(), anyString(), anyString());
verify(mockTablesDelete).execute();
// Table data read.
verify(mockClient).tabledata();
verify(mockTabledata).list("project", "tempdataset", "temptable");
verify(mockTabledataList).execute();
}
use of com.google.api.services.bigquery.model.JobConfiguration in project beam by apache.
the class BigQueryTableRowIterator method executeQueryAndWaitForCompletion.
/**
* Executes the specified query and returns a reference to the temporary BigQuery table created
* to hold the results.
*
* @throws IOException if the query fails.
*/
private TableReference executeQueryAndWaitForCompletion() throws IOException, InterruptedException {
checkState(projectId != null, "Unable to execute a query without a configured project id");
checkState(queryConfig != null, "Unable to execute a query without a configured query");
// Dry run query to get source table location
Job dryRunJob = new Job().setConfiguration(new JobConfiguration().setQuery(queryConfig).setDryRun(true));
JobStatistics jobStats = executeWithBackOff(client.jobs().insert(projectId, dryRunJob), String.format("Error when trying to dry run query %s.", queryConfig.toPrettyString())).getStatistics();
// Let BigQuery to pick default location if the query does not read any tables.
String location = null;
@Nullable List<TableReference> tables = jobStats.getQuery().getReferencedTables();
if (tables != null && !tables.isEmpty()) {
Table table = getTable(tables.get(0));
location = table.getLocation();
}
// Create a temporary dataset to store results.
// Starting dataset name with an "_" so that it is hidden.
Random rnd = new Random(System.currentTimeMillis());
temporaryDatasetId = "_beam_temporary_dataset_" + rnd.nextInt(1000000);
temporaryTableId = "beam_temporary_table_" + rnd.nextInt(1000000);
createDataset(temporaryDatasetId, location);
Job job = new Job();
JobConfiguration config = new JobConfiguration();
config.setQuery(queryConfig);
job.setConfiguration(config);
TableReference destinationTable = new TableReference();
destinationTable.setProjectId(projectId);
destinationTable.setDatasetId(temporaryDatasetId);
destinationTable.setTableId(temporaryTableId);
queryConfig.setDestinationTable(destinationTable);
queryConfig.setAllowLargeResults(true);
Job queryJob = executeWithBackOff(client.jobs().insert(projectId, job), String.format("Error when trying to execute the job for query %s.", queryConfig.toPrettyString()));
JobReference jobId = queryJob.getJobReference();
while (true) {
Job pollJob = executeWithBackOff(client.jobs().get(projectId, jobId.getJobId()), String.format("Error when trying to get status of the job for query %s.", queryConfig.toPrettyString()));
JobStatus status = pollJob.getStatus();
if (status.getState().equals("DONE")) {
// Job is DONE, but did not necessarily succeed.
ErrorProto error = status.getErrorResult();
if (error == null) {
return pollJob.getConfiguration().getQuery().getDestinationTable();
} else {
// There will be no temporary table to delete, so null out the reference.
temporaryTableId = null;
throw new IOException(String.format("Executing query %s failed: %s", queryConfig.toPrettyString(), error.getMessage()));
}
}
Uninterruptibles.sleepUninterruptibly(QUERY_COMPLETION_POLL_TIME.getMillis(), TimeUnit.MILLISECONDS);
}
}
use of com.google.api.services.bigquery.model.JobConfiguration in project beam by apache.
the class BigqueryClient method queryUnflattened.
/**
* Performs a query without flattening results.
*/
@Nonnull
public List<TableRow> queryUnflattened(String query, String projectId, boolean typed) throws IOException, InterruptedException {
Random rnd = new Random(System.currentTimeMillis());
String temporaryDatasetId = "_dataflow_temporary_dataset_" + rnd.nextInt(1000000);
String temporaryTableId = "dataflow_temporary_table_" + rnd.nextInt(1000000);
TableReference tempTableReference = new TableReference().setProjectId(projectId).setDatasetId(temporaryDatasetId).setTableId(temporaryTableId);
createNewDataset(projectId, temporaryDatasetId);
createNewTable(projectId, temporaryDatasetId, new Table().setTableReference(tempTableReference));
JobConfigurationQuery jcQuery = new JobConfigurationQuery().setFlattenResults(false).setAllowLargeResults(true).setDestinationTable(tempTableReference).setQuery(query);
JobConfiguration jc = new JobConfiguration().setQuery(jcQuery);
Job job = new Job().setConfiguration(jc);
Job insertedJob = bqClient.jobs().insert(projectId, job).execute();
GetQueryResultsResponse qResponse;
do {
qResponse = bqClient.jobs().getQueryResults(projectId, insertedJob.getJobReference().getJobId()).execute();
} while (!qResponse.getJobComplete());
final TableSchema schema = qResponse.getSchema();
final List<TableRow> rows = qResponse.getRows();
deleteDataset(projectId, temporaryDatasetId);
return !typed ? rows : rows.stream().map(r -> getTypedTableRow(schema.getFields(), r)).collect(Collectors.toList());
}
use of com.google.api.services.bigquery.model.JobConfiguration in project beam by apache.
the class FakeJobService method startLoadJob.
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig) throws IOException {
synchronized (allJobs) {
verifyUniqueJobId(jobRef.getJobId());
Job job = new Job();
job.setJobReference(jobRef);
job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
job.setKind(" bigquery#job");
job.setStatus(new JobStatus().setState("PENDING"));
// the caller.
if (loadConfig.getSourceUris().size() > 0) {
ImmutableList.Builder<ResourceId> sourceFiles = ImmutableList.builder();
ImmutableList.Builder<ResourceId> loadFiles = ImmutableList.builder();
for (String filename : loadConfig.getSourceUris()) {
sourceFiles.add(FileSystems.matchNewResource(filename, false));
loadFiles.add(FileSystems.matchNewResource(filename + ThreadLocalRandom.current().nextInt(), false));
}
FileSystems.copy(sourceFiles.build(), loadFiles.build());
filesForLoadJobs.put(jobRef.getProjectId(), jobRef.getJobId(), loadFiles.build());
}
allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
}
}
Aggregations