use of com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testReadAtSnapshot.
@Test
public void testReadAtSnapshot() throws InterruptedException, IOException {
Field intFieldSchema = Field.newBuilder("col", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).setDescription("IntegerDescription").build();
com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(intFieldSchema);
TableId testTableId = TableId.of(/* dataset = */
DATASET, /* table = */
"test_read_snapshot");
bigquery.create(TableInfo.of(testTableId, StandardTableDefinition.of(tableSchema)));
TableReference tableReference = TableReference.newBuilder().setTableId(testTableId.getTable()).setDatasetId(DATASET).setProjectId(ServiceOptions.getDefaultProjectId()).build();
Job firstJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
testTableId, /* query = */
"SELECT 1 AS col");
Job secondJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
testTableId, /* query = */
"SELECT 2 AS col");
final List<Long> rowsAfterFirstSnapshot = new ArrayList<>();
ProcessRowsAtSnapshot(/* tableReference = */
tableReference, /* snapshotInMillis = */
firstJob.getStatistics().getEndTime(), /* filter = */
null, /* consumer = */
new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
rowsAfterFirstSnapshot.add((Long) record.get("col"));
}
});
assertEquals(Arrays.asList(1L), rowsAfterFirstSnapshot);
final List<Long> rowsAfterSecondSnapshot = new ArrayList<>();
ProcessRowsAtSnapshot(/* tableReference = */
tableReference, /* snapshotInMillis = */
secondJob.getStatistics().getEndTime(), /* filter = */
null, /* consumer = */
new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
rowsAfterSecondSnapshot.add((Long) record.get("col"));
}
});
Collections.sort(rowsAfterSecondSnapshot);
assertEquals(Arrays.asList(1L, 2L), rowsAfterSecondSnapshot);
}
use of com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method ReadAllRows.
/**
* Reads all the rows from the specified table reference and returns a list as generic Avro
* records.
*
* @param tableReference
* @param filter Optional. If specified, it will be used to restrict returned data.
* @return
*/
List<GenericData.Record> ReadAllRows(TableReference tableReference, String filter) throws IOException {
final List<GenericData.Record> rows = new ArrayList<>();
ProcessRowsAtSnapshot(/* tableReference = */
tableReference, /* snapshotInMillis = */
null, /* filter = */
filter, new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
// clone the record since that reference will be reused by the reader.
rows.add(new GenericRecordBuilder(record).build());
}
});
return rows;
}
use of com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testColumnSelection.
@Test
public void testColumnSelection() throws IOException {
String table = BigQueryResource.FormatTableResource(/* projectId = */
"bigquery-public-data", /* datasetId = */
"samples", /* tableId = */
"shakespeare");
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
true));
assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
SimpleRowReader reader = new SimpleRowReader(avroSchema);
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
Long wordCount = (Long) record.get("word_count");
assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
Utf8 word = (Utf8) record.get("word");
assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testFilter.
@Test
public void testFilter() throws IOException {
String table = BigQueryResource.FormatTableResource(/* projectId = */
"bigquery-public-data", /* datasetId = */
"samples", /* tableId = */
"shakespeare");
TableReadOptions options = TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
Long wordCount = (Long) record.get("word_count");
assertWithMessage("Row not matching expectations: %s", record.toString()).that(wordCount).isGreaterThan(100L);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method ProcessRowsAtSnapshot.
/**
* Reads all the rows from the specified table.
*
* <p>For every row, the consumer is called for processing.
*
* @param table
* @param snapshotInMillis Optional. If specified, all rows up to timestamp will be returned.
* @param filter Optional. If specified, it will be used to restrict returned data.
* @param consumer that receives all Avro rows.
* @throws IOException
*/
private void ProcessRowsAtSnapshot(String table, Long snapshotInMillis, String filter, AvroRowConsumer consumer) throws IOException {
Preconditions.checkNotNull(table);
Preconditions.checkNotNull(consumer);
CreateReadSessionRequest.Builder createSessionRequestBuilder = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setDataFormat(DataFormat.AVRO).build());
if (snapshotInMillis != null) {
Timestamp snapshotTimestamp = Timestamp.newBuilder().setSeconds(snapshotInMillis / 1_000).setNanos((int) ((snapshotInMillis % 1000) * 1000000)).build();
createSessionRequestBuilder.getReadSessionBuilder().setTableModifiers(TableModifiers.newBuilder().setSnapshotTime(snapshotTimestamp).build());
}
if (filter != null && !filter.isEmpty()) {
createSessionRequestBuilder.getReadSessionBuilder().setReadOptions(TableReadOptions.newBuilder().setRowRestriction(filter).build());
}
ReadSession session = client.createReadSession(createSessionRequestBuilder.build());
assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
reader.processRows(response.getAvroRows(), consumer);
}
}
Aggregations