Search in sources :

Example 6 with StreamPosition

use of com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageLongRunningTest method readAllRowsFromStream.

private long readAllRowsFromStream(Stream stream) {
    StreamPosition readPosition = StreamPosition.newBuilder().setStream(stream).build();
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : serverStream) {
        rowCount += response.getRowCount();
    }
    LOG.info(String.format("Read total of %d rows from stream '%s'.", rowCount, stream.getName()));
    return rowCount;
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse) StreamPosition(com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest)

Example 7 with StreamPosition

use of com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testFilter.

@Test
public void testFilter() throws IOException {
    TableReference tableReference = TableReference.newBuilder().setProjectId("bigquery-public-data").setDatasetId("samples").setTableId("shakespeare").build();
    TableReadOptions options = TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build();
    CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setRequestedStreams(1).setTableReference(tableReference).setReadOptions(options).setFormat(DataFormat.AVRO).build();
    ReadSession session = client.createReadSession(request);
    assertEquals(String.format("Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", TextFormat.shortDebugString(tableReference), session.toString()), 1, session.getStreamsCount());
    StreamPosition readPosition = StreamPosition.newBuilder().setStream(session.getStreams(0)).build();
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
    SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
        reader.processRows(response.getAvroRows(), new SimpleRowReader.AvroRowConsumer() {

            @Override
            public void accept(GenericData.Record record) {
                Long wordCount = (Long) record.get("word_count");
                assertWithMessage("Row not matching expectations: %s", record.toString()).that(wordCount).isGreaterThan(100L);
            }
        });
    }
    assertEquals(1_333, rowCount);
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1beta1.it.SimpleRowReader.AvroRowConsumer) ReadSession(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession) StreamPosition(com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest) GenericData(org.apache.avro.generic.GenericData) TableReference(com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableReference) ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse) TableReadOptions(com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest) Test(org.junit.Test)

Example 8 with StreamPosition

use of com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testSimpleReadAndResume.

@Test
public void testSimpleReadAndResume() {
    TableReference tableReference = TableReference.newBuilder().setProjectId("bigquery-public-data").setDatasetId("samples").setTableId("shakespeare").build();
    ReadSession session = client.createReadSession(/* tableReference = */
    tableReference, /* parent = */
    parentProjectId, /* requestedStreams = */
    1);
    assertEquals(String.format("Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", TextFormat.shortDebugString(tableReference), session.toString()), 1, session.getStreamsCount());
    // We have to read some number of rows in order to be able to resume. More details:
    // https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1beta1#google.cloud.bigquery.storage.v1beta1.ReadRowsRequest
    long rowCount = ReadStreamToOffset(session.getStreams(0), /* rowOffset = */
    34_846);
    StreamPosition readPosition = StreamPosition.newBuilder().setStream(session.getStreams(0)).setOffset(rowCount).build();
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
    }
    // Verifies that the number of rows skipped and read equals to the total number of rows in the
    // table.
    assertEquals(164_656, rowCount);
}
Also used : TableReference(com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableReference) ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse) ReadSession(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession) StreamPosition(com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest) Test(org.junit.Test)

Example 9 with StreamPosition

use of com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadStreamToOffset.

/**
 * Reads to the specified row offset within the stream. If the stream does not have the desired
 * rows to read, it will read all of them.
 *
 * @param stream
 * @param rowOffset
 * @return the number of requested rows to skip or the total rows read if stream had less rows.
 */
private long ReadStreamToOffset(Stream stream, long rowOffset) {
    StreamPosition readPosition = StreamPosition.newBuilder().setStream(stream).build();
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
    Iterator<ReadRowsResponse> responseIterator = serverStream.iterator();
    while (responseIterator.hasNext()) {
        ReadRowsResponse response = responseIterator.next();
        rowCount += response.getRowCount();
        if (rowCount >= rowOffset) {
            return rowOffset;
        }
    }
    return rowCount;
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse) StreamPosition(com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest)

Aggregations

ReadRowsRequest (com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest)9 ReadRowsResponse (com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse)9 StreamPosition (com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition)9 Test (org.junit.Test)6 ReadSession (com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession)5 TableReference (com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableReference)4 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest)3 MockStreamObserver (com.google.api.gax.grpc.testing.MockStreamObserver)2 TableReadOptions (com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions)2 AvroRowConsumer (com.google.cloud.bigquery.storage.v1beta1.it.SimpleRowReader.AvroRowConsumer)2 GenericData (org.apache.avro.generic.GenericData)2 InvalidArgumentException (com.google.api.gax.rpc.InvalidArgumentException)1 Timestamp (com.google.protobuf.Timestamp)1 StatusRuntimeException (io.grpc.StatusRuntimeException)1 ExecutionException (java.util.concurrent.ExecutionException)1 Schema (org.apache.avro.Schema)1 Utf8 (org.apache.avro.util.Utf8)1