Search in sources :

Example 31 with ReadSession

use of com.google.cloud.bigquery.storage.v1beta2.ReadSession in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testSimpleRead.

@Test
public void testSimpleRead() {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    1);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
    }
    assertEquals(164_656, rowCount);
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse) ReadSession(com.google.cloud.bigquery.storage.v1beta2.ReadSession) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest) Test(org.junit.Test)

Example 32 with ReadSession

use of com.google.cloud.bigquery.storage.v1beta2.ReadSession in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageLongRunningTest method testLongRunningReadSession.

@Test
public void testLongRunningReadSession() throws InterruptedException, ExecutionException {
    // This test reads a larger table with the goal of doing a simple validation of timeout settings
    // for a longer running session.
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "wikipedia");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    5);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 5, session.getStreamsCount());
    List<Callable<Long>> tasks = new ArrayList<>(session.getStreamsCount());
    for (final ReadStream stream : session.getStreamsList()) {
        tasks.add(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                return readAllRowsFromStream(stream);
            }
        });
    }
    ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
    List<Future<Long>> results = executor.invokeAll(tasks);
    long rowCount = 0;
    for (Future<Long> result : results) {
        rowCount += result.get();
    }
    assertEquals(313_797_035, rowCount);
}
Also used : ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ReadStream(com.google.cloud.bigquery.storage.v1.ReadStream) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) Test(org.junit.Test)

Example 33 with ReadSession

use of com.google.cloud.bigquery.storage.v1beta2.ReadSession in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testSimpleRead.

@Test
public void testSimpleRead() {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    1);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
    }
    assertEquals(164_656, rowCount);
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) Test(org.junit.Test)

Example 34 with ReadSession

use of com.google.cloud.bigquery.storage.v1beta2.ReadSession in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testFilter.

@Test
public void testFilter() throws IOException {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    TableReadOptions options = TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build();
    CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
    ReadSession session = client.createReadSession(request);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
        reader.processRows(response.getAvroRows(), new AvroRowConsumer() {

            @Override
            public void accept(GenericData.Record record) {
                Long wordCount = (Long) record.get("word_count");
                assertWithMessage("Row not matching expectations: %s", record.toString()).that(wordCount).isGreaterThan(100L);
            }
        });
    }
    assertEquals(1_333, rowCount);
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) GenericData(org.apache.avro.generic.GenericData) ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) TableReadOptions(com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest) Test(org.junit.Test)

Example 35 with ReadSession

use of com.google.cloud.bigquery.storage.v1beta2.ReadSession in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testSimpleReadAndResume.

@Test
public void testSimpleReadAndResume() {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    1);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    // We have to read some number of rows in order to be able to resume. More details:
    long rowCount = ReadStreamToOffset(session.getStreams(0), /* rowOffset = */
    34_846);
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).setOffset(rowCount).build();
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
    }
    // Verifies that the number of rows skipped and read equals to the total number of rows in the
    // table.
    assertEquals(164_656, rowCount);
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) Test(org.junit.Test)

Aggregations

ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)29 Test (org.junit.Test)23 ReadRowsRequest (com.google.cloud.bigquery.storage.v1.ReadRowsRequest)17 ReadRowsResponse (com.google.cloud.bigquery.storage.v1.ReadRowsResponse)17 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)15 StorageClient (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient)14 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)13 TableRow (com.google.api.services.bigquery.model.TableRow)10 TableRowParser (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser)9 Table (com.google.api.services.bigquery.model.Table)8 TableReference (com.google.api.services.bigquery.model.TableReference)7 ByteString (com.google.protobuf.ByteString)7 TableReadOptions (com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions)6 ReadSession (com.google.cloud.bigquery.storage.v1beta2.ReadSession)6 GenericRecord (org.apache.avro.generic.GenericRecord)6 TableInfo (com.google.cloud.bigquery.TableInfo)5 ReadRowsRequest (com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)5 ReadRowsResponse (com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse)5 ArrayList (java.util.ArrayList)5 TableId (com.google.cloud.bigquery.TableId)4