Search in sources :

Example 6 with JobStatistics2

use of com.google.api.services.bigquery.model.JobStatistics2 in project beam by apache.

the class BigQueryIOStorageQueryTest method testQuerySourceInitialSplitWithBigQueryProject_EmptyResult.

@Test
@ProjectOverride
public void testQuerySourceInitialSplitWithBigQueryProject_EmptyResult() throws Exception {
    TableReference sourceTableRef = BigQueryHelpers.parseTableSpec("bigquery-project-id:dataset.table");
    fakeDatasetService.createDataset(sourceTableRef.getProjectId(), sourceTableRef.getDatasetId(), "asia-northeast1", "Fake plastic tree^H^H^H^Htables", null);
    fakeDatasetService.createTable(new Table().setTableReference(sourceTableRef).setLocation("asia-northeast1"));
    Table queryResultTable = new Table().setSchema(TABLE_SCHEMA).setNumBytes(0L);
    String encodedQuery = FakeBigQueryServices.encodeQueryResult(queryResultTable);
    fakeJobService.expectDryRunQuery(options.getBigQueryProject(), encodedQuery, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(1024L * 1024L).setReferencedTables(ImmutableList.of(sourceTableRef))));
    String stepUuid = "testStepUuid";
    TableReference tempTableReference = createTempTableReference(options.getBigQueryProject(), BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY), Optional.empty());
    CreateReadSessionRequest expectedRequest = CreateReadSessionRequest.newBuilder().setParent("projects/" + options.getBigQueryProject()).setReadSession(ReadSession.newBuilder().setTable(BigQueryHelpers.toTableResourceName(tempTableReference)).setDataFormat(DataFormat.AVRO)).setMaxStreamCount(10).build();
    ReadSession emptyReadSession = ReadSession.newBuilder().build();
    StorageClient fakeStorageClient = mock(StorageClient.class);
    when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(emptyReadSession);
    BigQueryStorageQuerySource<TableRow> querySource = BigQueryStorageQuerySource.create(stepUuid, ValueProvider.StaticValueProvider.of(encodedQuery), /* flattenResults = */
    true, /* useLegacySql = */
    true, /* priority = */
    QueryPriority.BATCH, /* location = */
    null, /* queryTempDataset = */
    null, /* kmsKey = */
    null, DataFormat.AVRO, new TableRowParser(), TableRowJsonCoder.of(), new FakeBigQueryServices().withDatasetService(fakeDatasetService).withJobService(fakeJobService).withStorageClient(fakeStorageClient));
    List<? extends BoundedSource<TableRow>> sources = querySource.split(1024L, options);
    assertTrue(sources.isEmpty());
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) Table(com.google.api.services.bigquery.model.Table) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) StorageClient(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient) ByteString(com.google.protobuf.ByteString) TableRowParser(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) TableRow(com.google.api.services.bigquery.model.TableRow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest) Test(org.junit.Test)

Example 7 with JobStatistics2

use of com.google.api.services.bigquery.model.JobStatistics2 in project beam by apache.

the class BigQueryIOStorageQueryTest method doQuerySourceInitialSplit.

private void doQuerySourceInitialSplit(long bundleSize, int requestedStreamCount, int expectedStreamCount) throws Exception {
    TableReference sourceTableRef = BigQueryHelpers.parseTableSpec("project:dataset.table");
    fakeDatasetService.createDataset(sourceTableRef.getProjectId(), sourceTableRef.getDatasetId(), "asia-northeast1", "Fake plastic tree^H^H^H^Htables", null);
    fakeDatasetService.createTable(new Table().setTableReference(sourceTableRef).setLocation("asia-northeast1"));
    Table queryResultTable = new Table().setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).setNumBytes(1024L * 1024L);
    String encodedQuery = FakeBigQueryServices.encodeQueryResult(queryResultTable);
    fakeJobService.expectDryRunQuery(options.getProject(), encodedQuery, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(1024L * 1024L).setReferencedTables(ImmutableList.of(sourceTableRef))));
    String stepUuid = "testStepUuid";
    TableReference tempTableReference = createTempTableReference(options.getProject(), BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY), Optional.empty());
    CreateReadSessionRequest expectedRequest = CreateReadSessionRequest.newBuilder().setParent("projects/" + options.getProject()).setReadSession(ReadSession.newBuilder().setTable(BigQueryHelpers.toTableResourceName(tempTableReference))).setMaxStreamCount(requestedStreamCount).build();
    Schema sessionSchema = SchemaBuilder.record("__root__").fields().name("name").type().nullable().stringType().noDefault().name("number").type().nullable().longType().noDefault().endRecord();
    ReadSession.Builder builder = ReadSession.newBuilder().setAvroSchema(AvroSchema.newBuilder().setSchema(sessionSchema.toString())).setDataFormat(DataFormat.AVRO);
    for (int i = 0; i < expectedStreamCount; i++) {
        builder.addStreams(ReadStream.newBuilder().setName("stream-" + i));
    }
    StorageClient fakeStorageClient = mock(StorageClient.class);
    when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(builder.build());
    BigQueryStorageQuerySource<TableRow> querySource = BigQueryStorageQuerySource.create(stepUuid, ValueProvider.StaticValueProvider.of(encodedQuery), /* flattenResults = */
    true, /* useLegacySql = */
    true, /* priority = */
    QueryPriority.BATCH, /* location = */
    null, /* queryTempDataset = */
    null, /* kmsKey = */
    null, null, new TableRowParser(), TableRowJsonCoder.of(), new FakeBigQueryServices().withDatasetService(fakeDatasetService).withJobService(fakeJobService).withStorageClient(fakeStorageClient));
    List<? extends BoundedSource<TableRow>> sources = querySource.split(bundleSize, options);
    assertEquals(expectedStreamCount, sources.size());
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) AvroSchema(com.google.cloud.bigquery.storage.v1.AvroSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) Schema(org.apache.avro.Schema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) StorageClient(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient) ByteString(com.google.protobuf.ByteString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TableRowParser(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) TableRow(com.google.api.services.bigquery.model.TableRow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)

Example 8 with JobStatistics2

use of com.google.api.services.bigquery.model.JobStatistics2 in project beam by apache.

the class BigQueryIOStorageQueryTest method testQuerySourceInitialSplit_EmptyResult.

@Test
public void testQuerySourceInitialSplit_EmptyResult() throws Exception {
    TableReference sourceTableRef = BigQueryHelpers.parseTableSpec("project:dataset.table");
    fakeDatasetService.createDataset(sourceTableRef.getProjectId(), sourceTableRef.getDatasetId(), "asia-northeast1", "Fake plastic tree^H^H^H^Htables", null);
    fakeDatasetService.createTable(new Table().setTableReference(sourceTableRef).setLocation("asia-northeast1"));
    Table queryResultTable = new Table().setSchema(TABLE_SCHEMA).setNumBytes(0L);
    String encodedQuery = FakeBigQueryServices.encodeQueryResult(queryResultTable);
    fakeJobService.expectDryRunQuery(options.getProject(), encodedQuery, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(1024L * 1024L).setReferencedTables(ImmutableList.of(sourceTableRef))));
    String stepUuid = "testStepUuid";
    TableReference tempTableReference = createTempTableReference(options.getProject(), BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY), Optional.empty());
    CreateReadSessionRequest expectedRequest = CreateReadSessionRequest.newBuilder().setParent("projects/" + options.getProject()).setReadSession(ReadSession.newBuilder().setTable(BigQueryHelpers.toTableResourceName(tempTableReference))).setMaxStreamCount(10).build();
    ReadSession emptyReadSession = ReadSession.newBuilder().build();
    StorageClient fakeStorageClient = mock(StorageClient.class);
    when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(emptyReadSession);
    BigQueryStorageQuerySource<TableRow> querySource = BigQueryStorageQuerySource.create(stepUuid, ValueProvider.StaticValueProvider.of(encodedQuery), /* flattenResults = */
    true, /* useLegacySql = */
    true, /* priority = */
    QueryPriority.BATCH, /* location = */
    null, /* queryTempDataset = */
    null, /* kmsKey = */
    null, null, new TableRowParser(), TableRowJsonCoder.of(), new FakeBigQueryServices().withDatasetService(fakeDatasetService).withJobService(fakeJobService).withStorageClient(fakeStorageClient));
    List<? extends BoundedSource<TableRow>> sources = querySource.split(1024L, options);
    assertTrue(sources.isEmpty());
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) Table(com.google.api.services.bigquery.model.Table) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) StorageClient(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient) ByteString(com.google.protobuf.ByteString) TableRowParser(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) TableRow(com.google.api.services.bigquery.model.TableRow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest) Test(org.junit.Test)

Example 9 with JobStatistics2

use of com.google.api.services.bigquery.model.JobStatistics2 in project beam by apache.

the class BigQueryTableRowIteratorTest method testReadFromQueryNoTables.

/**
   * Verifies that queries that reference no data can be read.
   */
@Test
public void testReadFromQueryNoTables() throws IOException, InterruptedException {
    // Mock job inserting.
    Job dryRunJob = new Job().setStatistics(new JobStatistics().setQuery(new JobStatistics2()));
    Job insertedJob = new Job().setJobReference(new JobReference());
    when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
    // Mock job polling.
    JobStatus status = new JobStatus().setState("DONE");
    JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery().setDestinationTable(new TableReference().setProjectId("project").setDatasetId("tempdataset").setTableId("temptable"));
    Job getJob = new Job().setJobReference(new JobReference()).setStatus(status).setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
    when(mockJobsGet.execute()).thenReturn(getJob);
    // Mock table schema fetch.
    when(mockTablesGet.execute()).thenReturn(noTableQuerySchema());
    byte[] photoBytes = "photograph".getBytes();
    String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
    // Mock table data fetch.
    when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42, photoBytesEncoded)));
    // Run query and verify
    String query = String.format("SELECT \"Arthur\" as name, 42 as count, \"%s\" as photo", photoBytesEncoded);
    JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
        iterator.open();
        assertTrue(iterator.advance());
        TableRow row = iterator.getCurrent();
        assertTrue(row.containsKey("name"));
        assertTrue(row.containsKey("count"));
        assertTrue(row.containsKey("photo"));
        assertEquals("Arthur", row.get("name"));
        assertEquals(42, row.get("count"));
        assertEquals(photoBytesEncoded, row.get("photo"));
        assertFalse(iterator.advance());
    }
    // Temp dataset created and later deleted.
    verify(mockClient, times(2)).datasets();
    verify(mockDatasets).insert(anyString(), any(Dataset.class));
    verify(mockDatasetsInsert).execute();
    verify(mockDatasets).delete(anyString(), anyString());
    verify(mockDatasetsDelete).execute();
    // Job inserted to run the query, polled once.
    verify(mockClient, times(3)).jobs();
    verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
    verify(mockJobsInsert, times(2)).execute();
    verify(mockJobs).get(anyString(), anyString());
    verify(mockJobsGet).execute();
    // Temp table get after query finish, deleted after reading.
    verify(mockClient, times(2)).tables();
    verify(mockTables, times(1)).get(anyString(), anyString(), anyString());
    verify(mockTablesGet, times(1)).execute();
    verify(mockTables).delete(anyString(), anyString(), anyString());
    verify(mockTablesDelete).execute();
    // Table data read.
    verify(mockClient).tabledata();
    verify(mockTabledata).list("project", "tempdataset", "temptable");
    verify(mockTabledataList).execute();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) JobReference(com.google.api.services.bigquery.model.JobReference) JobConfigurationQuery(com.google.api.services.bigquery.model.JobConfigurationQuery) Dataset(com.google.api.services.bigquery.model.Dataset) Matchers.anyString(org.mockito.Matchers.anyString) Matchers.containsString(org.hamcrest.Matchers.containsString) JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) JobConfiguration(com.google.api.services.bigquery.model.JobConfiguration) Test(org.junit.Test)

Example 10 with JobStatistics2

use of com.google.api.services.bigquery.model.JobStatistics2 in project beam by apache.

the class BigQueryIOReadTest method testBigQueryQuerySourceEstimatedSize.

@Test
public void testBigQueryQuerySourceEstimatedSize() throws Exception {
    String queryString = "fake query string";
    PipelineOptions options = PipelineOptionsFactory.create();
    BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
    bqOptions.setProject("project");
    String stepUuid = "testStepUuid";
    BigQuerySourceBase<TableRow> bqSource = BigQueryQuerySourceDef.create(fakeBqServices, ValueProvider.StaticValueProvider.of(queryString), true, /* flattenResults */
    true, /* useLegacySql */
    QueryPriority.BATCH, null, null, null).toSource(stepUuid, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE, false);
    fakeJobService.expectDryRunQuery(bqOptions.getProject(), queryString, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(100L)));
    assertEquals(100, bqSource.getEstimatedSizeBytes(bqOptions));
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) JobStatistics2(com.google.api.services.bigquery.model.JobStatistics2) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableRow(com.google.api.services.bigquery.model.TableRow) ByteString(com.google.protobuf.ByteString) Test(org.junit.Test)

Aggregations

JobStatistics (com.google.api.services.bigquery.model.JobStatistics)13 JobStatistics2 (com.google.api.services.bigquery.model.JobStatistics2)13 TableRow (com.google.api.services.bigquery.model.TableRow)12 Test (org.junit.Test)11 TableReference (com.google.api.services.bigquery.model.TableReference)10 Table (com.google.api.services.bigquery.model.Table)9 ByteString (com.google.protobuf.ByteString)9 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)6 TableSchema (com.google.api.services.bigquery.model.TableSchema)6 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)6 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)5 TableRowParser (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser)5 StorageClient (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient)5 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)5 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)5 Job (com.google.api.services.bigquery.model.Job)4 JobStatus (com.google.api.services.bigquery.model.JobStatus)4 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)4 Dataset (com.google.api.services.bigquery.model.Dataset)2 JobConfiguration (com.google.api.services.bigquery.model.JobConfiguration)2