use of com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testColumnSelection.
@Test
public void testColumnSelection() throws IOException {
String table = BigQueryResource.FormatTableResource(/* projectId = */
"bigquery-public-data", /* datasetId = */
"samples", /* tableId = */
"shakespeare");
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
true));
assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
SimpleRowReader reader = new SimpleRowReader(avroSchema);
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
Long wordCount = (Long) record.get("word_count");
assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
Utf8 word = (Utf8) record.get("word");
assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testFilter.
@Test
public void testFilter() throws IOException {
String table = BigQueryResource.FormatTableResource(/* projectId = */
"bigquery-public-data", /* datasetId = */
"samples", /* tableId = */
"shakespeare");
TableReadOptions options = TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
Long wordCount = (Long) record.get("word_count");
assertWithMessage("Row not matching expectations: %s", record.toString()).that(wordCount).isGreaterThan(100L);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testColumnSelection.
@Test
public void testColumnSelection() throws IOException {
String table = BigQueryResource.FormatTableResource(/* projectId = */
"bigquery-public-data", /* datasetId = */
"samples", /* tableId = */
"shakespeare");
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
true));
assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
SimpleRowReader reader = new SimpleRowReader(avroSchema);
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
Long wordCount = (Long) record.get("word_count");
assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
Utf8 word = (Utf8) record.get("word");
assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions in project java-bigquerystorage by googleapis.
the class StorageArrowSample method main.
public static void main(String... args) throws Exception {
// Sets your Google Cloud Platform project ID.
// String projectId = "YOUR_PROJECT_ID";
String projectId = args[0];
Integer snapshotMillis = null;
if (args.length > 1) {
snapshotMillis = Integer.parseInt(args[1]);
}
try (BigQueryReadClient client = BigQueryReadClient.create()) {
String parent = String.format("projects/%s", projectId);
// This example uses baby name data from the public datasets.
String srcTable = String.format("projects/%s/datasets/%s/tables/%s", "bigquery-public-data", "usa_names", "usa_1910_current");
// We specify the columns to be projected by adding them to the selected fields,
// and set a simple filter to restrict which rows are transmitted.
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("name").addSelectedFields("number").addSelectedFields("state").setRowRestriction("state = \"WA\"").build();
// Start specifying the read session we want created.
ReadSession.Builder sessionBuilder = ReadSession.newBuilder().setTable(srcTable).setDataFormat(DataFormat.ARROW).setReadOptions(options);
// Optionally specify the snapshot time. When unspecified, snapshot time is "now".
if (snapshotMillis != null) {
Timestamp t = Timestamp.newBuilder().setSeconds(snapshotMillis / 1000).setNanos((int) ((snapshotMillis % 1000) * 1000000)).build();
TableModifiers modifiers = TableModifiers.newBuilder().setSnapshotTime(t).build();
sessionBuilder.setTableModifiers(modifiers);
}
// Begin building the session creation request.
CreateReadSessionRequest.Builder builder = CreateReadSessionRequest.newBuilder().setParent(parent).setReadSession(sessionBuilder).setMaxStreamCount(1);
ReadSession session = client.createReadSession(builder.build());
// Setup a simple reader and start a read session.
try (SimpleRowReader reader = new SimpleRowReader(session.getArrowSchema())) {
// Assert that there are streams available in the session. An empty table may not have
// data available. If no sessions are available for an anonymous (cached) table, consider
// writing results of a query to a named table rather than consuming cached results
// directly.
Preconditions.checkState(session.getStreamsCount() > 0);
// Use the first stream to perform reading.
String streamName = session.getStreams(0).getName();
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(streamName).build();
// Process each block of rows as they arrive and decode using our simple row reader.
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
Preconditions.checkState(response.hasArrowRecordBatch());
reader.processRows(response.getArrowRecordBatch());
}
}
}
}
use of com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions in project java-bigquerystorage by googleapis.
the class StorageSample method main.
public static void main(String... args) throws Exception {
// Sets your Google Cloud Platform project ID.
// String projectId = "YOUR_PROJECT_ID";
String projectId = args[0];
Integer snapshotMillis = null;
if (args.length > 1) {
snapshotMillis = Integer.parseInt(args[1]);
}
try (BigQueryReadClient client = BigQueryReadClient.create()) {
String parent = String.format("projects/%s", projectId);
// This example uses baby name data from the public datasets.
String srcTable = String.format("projects/%s/datasets/%s/tables/%s", "bigquery-public-data", "usa_names", "usa_1910_current");
// We specify the columns to be projected by adding them to the selected fields,
// and set a simple filter to restrict which rows are transmitted.
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("name").addSelectedFields("number").addSelectedFields("state").setRowRestriction("state = \"WA\"").build();
// Start specifying the read session we want created.
ReadSession.Builder sessionBuilder = ReadSession.newBuilder().setTable(srcTable).setDataFormat(DataFormat.AVRO).setReadOptions(options);
// Optionally specify the snapshot time. When unspecified, snapshot time is "now".
if (snapshotMillis != null) {
Timestamp t = Timestamp.newBuilder().setSeconds(snapshotMillis / 1000).setNanos((int) ((snapshotMillis % 1000) * 1000000)).build();
TableModifiers modifiers = TableModifiers.newBuilder().setSnapshotTime(t).build();
sessionBuilder.setTableModifiers(modifiers);
}
// Begin building the session creation request.
CreateReadSessionRequest.Builder builder = CreateReadSessionRequest.newBuilder().setParent(parent).setReadSession(sessionBuilder).setMaxStreamCount(1);
// Request the session creation.
ReadSession session = client.createReadSession(builder.build());
SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
// Assert that there are streams available in the session. An empty table may not have
// data available. If no sessions are available for an anonymous (cached) table, consider
// writing results of a query to a named table rather than consuming cached results directly.
Preconditions.checkState(session.getStreamsCount() > 0);
// Use the first stream to perform reading.
String streamName = session.getStreams(0).getName();
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(streamName).build();
// Process each block of rows as they arrive and decode using our simple row reader.
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
Preconditions.checkState(response.hasAvroRows());
reader.processRows(response.getAvroRows());
}
}
}
Aggregations