Search in sources :

Example 11 with ReadRowsRequest

use of com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageLongRunningTest method readAllRowsFromStream.

private long readAllRowsFromStream(ReadStream readStream) {
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(readStream.getName()).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : serverStream) {
        rowCount += response.getRowCount();
    }
    LOG.info(String.format("Read total of %d rows from stream '%s'.", rowCount, readStream.getName()));
    return rowCount;
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)

Example 12 with ReadRowsRequest

use of com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ProcessRowsAtSnapshot.

/**
 * Reads all the rows from the specified table.
 *
 * <p>For every row, the consumer is called for processing.
 *
 * @param table
 * @param snapshotInMillis Optional. If specified, all rows up to timestamp will be returned.
 * @param filter Optional. If specified, it will be used to restrict returned data.
 * @param consumer that receives all Avro rows.
 * @throws IOException
 */
private void ProcessRowsAtSnapshot(String table, Long snapshotInMillis, String filter, AvroRowConsumer consumer) throws IOException {
    Preconditions.checkNotNull(table);
    Preconditions.checkNotNull(consumer);
    CreateReadSessionRequest.Builder createSessionRequestBuilder = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build());
    if (snapshotInMillis != null) {
        Timestamp snapshotTimestamp = Timestamp.newBuilder().setSeconds(snapshotInMillis / 1_000).setNanos((int) ((snapshotInMillis % 1000) * 1000000)).build();
        createSessionRequestBuilder.getReadSessionBuilder().setTableModifiers(TableModifiers.newBuilder().setSnapshotTime(snapshotTimestamp).build());
    }
    if (filter != null && !filter.isEmpty()) {
        createSessionRequestBuilder.getReadSessionBuilder().setReadOptions(TableReadOptions.newBuilder().setRowRestriction(filter).build());
    }
    ReadSession session = client.createReadSession(createSessionRequestBuilder.build());
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        reader.processRows(response.getAvroRows(), consumer);
    }
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) Timestamp(com.google.protobuf.Timestamp) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)

Example 13 with ReadRowsRequest

use of com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testColumnSelection.

@Test
public void testColumnSelection() throws IOException {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
    CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
    ReadSession session = client.createReadSession(request);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
    String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
    true));
    assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
    assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
    assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
    assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
    assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
    SimpleRowReader reader = new SimpleRowReader(avroSchema);
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
        reader.processRows(response.getAvroRows(), new AvroRowConsumer() {

            @Override
            public void accept(GenericData.Record record) {
                String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
                Long wordCount = (Long) record.get("word_count");
                assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
                Utf8 word = (Utf8) record.get("word");
                assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
            }
        });
    }
    assertEquals(1_333, rowCount);
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) Schema(org.apache.avro.Schema) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) GenericData(org.apache.avro.generic.GenericData) ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) Utf8(org.apache.avro.util.Utf8) TableReadOptions(com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest) Test(org.junit.Test)

Example 14 with ReadRowsRequest

use of com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadStreamToOffset.

/**
 * Reads to the specified row offset within the stream. If the stream does not have the desired
 * rows to read, it will read all of them.
 *
 * @param readStream
 * @param rowOffset
 * @return the number of requested rows to skip or the total rows read if stream had less rows.
 */
private long ReadStreamToOffset(ReadStream readStream, long rowOffset) {
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(readStream.getName()).build();
    long rowCount = 0;
    ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
    Iterator<ReadRowsResponse> responseIterator = serverStream.iterator();
    while (responseIterator.hasNext()) {
        ReadRowsResponse response = responseIterator.next();
        rowCount += response.getRowCount();
        if (rowCount >= rowOffset) {
            return rowOffset;
        }
    }
    return rowCount;
}
Also used : ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest)

Example 15 with ReadRowsRequest

use of com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest in project java-bigquerystorage by googleapis.

the class ReadRowsAttemptCallable method call.

/**
 * Sends the actual RPC. The request being sent will first be transformed by the {@link
 * StreamResumptionStrategy}.
 *
 * <p>This method expects to be called by one thread at a time. Furthermore, it expects that the
 * current RPC finished before the next time it's called.
 */
@Override
public Void call() {
    Preconditions.checkState(isStarted, "Must be started first");
    ReadRowsRequest request = (++numAttempts == 1) ? initialRequest : resumptionStrategy.getResumeRequest(initialRequest);
    // Should never happen. onAttemptError will check if ResumptionStrategy can create a resume
    // request,
    // which the RetryingFuture/StreamResumptionStrategy should respect.
    Preconditions.checkState(request != null, "ResumptionStrategy returned a null request.");
    innerAttemptFuture = SettableApiFuture.create();
    seenSuccessSinceLastError = false;
    ApiCallContext attemptContext = context;
    if (!outerRetryingFuture.getAttemptSettings().getRpcTimeout().isZero()) {
        attemptContext = attemptContext.withStreamWaitTimeout(outerRetryingFuture.getAttemptSettings().getRpcTimeout());
    }
    attemptContext.getTracer().attemptStarted(outerRetryingFuture.getAttemptSettings().getOverallAttemptCount());
    innerCallable.call(request, new StateCheckingResponseObserver<ReadRowsResponse>() {

        @Override
        public void onStartImpl(StreamController controller) {
            onAttemptStart(controller);
        }

        @Override
        public void onResponseImpl(ReadRowsResponse response) {
            onAttemptResponse(response);
        }

        @Override
        public void onErrorImpl(Throwable t) {
            onAttemptError(t);
        }

        @Override
        public void onCompleteImpl() {
            onAttemptComplete();
        }
    }, attemptContext);
    outerRetryingFuture.setAttemptFuture(innerAttemptFuture);
    return null;
}
Also used : StreamController(com.google.api.gax.rpc.StreamController) ReadRowsResponse(com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse) ReadRowsRequest(com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest) ApiCallContext(com.google.api.gax.rpc.ApiCallContext)

Aggregations

Test (org.junit.Test)59 ReadRowsRequest (com.google.cloud.bigquery.storage.v1.ReadRowsRequest)31 ReadRowsRequest (com.google.bigtable.v2.ReadRowsRequest)28 ReadRowsResponse (com.google.cloud.bigquery.storage.v1.ReadRowsResponse)24 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)17 ReadRowsRequest (com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)16 ReadRowsResponse (com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse)11 StorageClient (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient)10 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)10 ReadRowsResponse (com.google.bigtable.v2.ReadRowsResponse)9 Row (com.google.cloud.bigtable.data.v2.models.Row)9 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)8 TableRow (com.google.api.services.bigquery.model.TableRow)7 Query (com.google.cloud.bigtable.data.v2.models.Query)7 ServerStreamingStashCallable (com.google.cloud.bigtable.gaxx.testing.FakeStreamingApi.ServerStreamingStashCallable)7 ByteString (com.google.protobuf.ByteString)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 TableRowParser (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser)6 ReadSession (com.google.cloud.bigquery.storage.v1beta2.ReadSession)5 DefaultRowAdapter (com.google.cloud.bigtable.data.v2.models.DefaultRowAdapter)5