Search in sources :

Example 6 with ReadStream

use of com.google.cloud.bigquery.storage.v1beta2.ReadStream in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageLongRunningTest method testLongRunningReadSession.

@Test
public void testLongRunningReadSession() throws InterruptedException, ExecutionException {
    // This test reads a larger table with the goal of doing a simple validation of timeout settings
    // for a longer running session.
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "wikipedia");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    5);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 5, session.getStreamsCount());
    List<Callable<Long>> tasks = new ArrayList<>(session.getStreamsCount());
    for (final ReadStream stream : session.getStreamsList()) {
        tasks.add(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                return readAllRowsFromStream(stream);
            }
        });
    }
    ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
    List<Future<Long>> results = executor.invokeAll(tasks);
    long rowCount = 0;
    for (Future<Long> result : results) {
        rowCount += result.get();
    }
    assertEquals(313_797_035, rowCount);
}
Also used : ReadSession(com.google.cloud.bigquery.storage.v1beta2.ReadSession) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ReadStream(com.google.cloud.bigquery.storage.v1beta2.ReadStream) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) Test(org.junit.Test)

Example 7 with ReadStream

use of com.google.cloud.bigquery.storage.v1beta2.ReadStream in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadStreamToOffset.

/**
 * Reads to the specified row offset within the stream. If the stream does not have the desired
 * rows to read, it will read all of them.
 *
 * @param readStream
 * @param rowOffset
 * @return the number of requested rows to skip or the total rows read if stream had less rows.
 */
private long ReadStreamToOffset(ReadStream readStream, long rowOffset) {
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(readStream.getName()).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
    Iterator<ReadRowsResponse> responseIterator = serverStream.iterator();
    while (responseIterator.hasNext()) {
        ReadRowsResponse response = responseIterator.next();
        rowCount += response.getRowCount();
        if (rowCount >= rowOffset) {
            return rowOffset;
        }
    }
    return rowCount;
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)

Example 8 with ReadStream

use of com.google.cloud.bigquery.storage.v1beta2.ReadStream in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageLongRunningTest method testLongRunningReadSession.

@Test
public void testLongRunningReadSession() throws InterruptedException, ExecutionException {
    // This test reads a larger table with the goal of doing a simple validation of timeout settings
    // for a longer running session.
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "wikipedia");
    ReadSession session = client.createReadSession(/* parent = */
    parentProjectId, /* readSession = */
    ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build(), /* maxStreamCount = */
    5);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 5, session.getStreamsCount());
    List<Callable<Long>> tasks = new ArrayList<>(session.getStreamsCount());
    for (final ReadStream stream : session.getStreamsList()) {
        tasks.add(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                return readAllRowsFromStream(stream);
            }
        });
    }
    ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
    List<Future<Long>> results = executor.invokeAll(tasks);
    long rowCount = 0;
    for (Future<Long> result : results) {
        rowCount += result.get();
    }
    assertEquals(313_797_035, rowCount);
}
Also used : ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ReadStream(com.google.cloud.bigquery.storage.v1.ReadStream) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) Test(org.junit.Test)

Aggregations

ReadStream (com.google.cloud.bigquery.storage.v1.ReadStream)5 Test (org.junit.Test)4 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)3 ArrayList (java.util.ArrayList)3 StorageClient (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 ReadRowsRequest (com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)2 ReadRowsResponse (com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse)2 IOException (java.io.IOException)2 Callable (java.util.concurrent.Callable)2 ExecutionException (java.util.concurrent.ExecutionException)2 ExecutorService (java.util.concurrent.ExecutorService)2 Future (java.util.concurrent.Future)2 TableRowParser (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser)2 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)2 Table (com.google.api.services.bigquery.model.Table)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 Field (com.google.cloud.bigquery.Field)1 Schema (com.google.cloud.bigquery.Schema)1 TableDefinition (com.google.cloud.bigquery.TableDefinition)1