use of com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryToParquet method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
private static PipelineResult run(BigQueryToParquetOptions options) {
// Create the pipeline.
Pipeline pipeline = Pipeline.create(options);
TableReadOptions.Builder builder = TableReadOptions.newBuilder();
/* Add fields to filter export on, if any. */
if (options.getFields() != null) {
builder.addAllSelectedFields(Arrays.asList(options.getFields().split(",\\s*")));
}
TableReadOptions tableReadOptions = builder.build();
BigQueryStorageClient client = BigQueryStorageClientFactory.create();
ReadSession session = ReadSessionFactory.create(client, options.getTableRef(), tableReadOptions);
// Extract schema from ReadSession
Schema schema = getTableSchema(session);
client.close();
TypedRead<GenericRecord> readFromBQ = BigQueryIO.read(SchemaAndRecord::getRecord).from(options.getTableRef()).withTemplateCompatibility().withMethod(Method.DIRECT_READ).withCoder(AvroCoder.of(schema));
if (options.getFields() != null) {
List<String> selectedFields = Splitter.on(",").splitToList(options.getFields());
readFromBQ = selectedFields.isEmpty() ? readFromBQ : readFromBQ.withSelectedFields(selectedFields);
}
/*
* Steps: 1) Read records from BigQuery via BigQueryIO.
* 2) Write records to Google Cloud Storage in Parquet format.
*/
pipeline.apply("ReadFromBigQuery", readFromBQ).apply("WriteToParquet", FileIO.<GenericRecord>write().via(ParquetIO.sink(schema)).to(options.getBucket()).withNumShards(options.getNumShards()).withSuffix(FILE_SUFFIX));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testColumnSelection.
@Test
public void testColumnSelection() throws IOException {
TableReference tableReference = TableReference.newBuilder().setProjectId("bigquery-public-data").setDatasetId("samples").setTableId("shakespeare").build();
TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setRequestedStreams(1).setTableReference(tableReference).setReadOptions(options).setFormat(DataFormat.AVRO).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", TextFormat.shortDebugString(tableReference), session.toString()), 1, session.getStreamsCount());
StreamPosition readPosition = StreamPosition.newBuilder().setStream(session.getStreams(0)).build();
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
true));
assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
SimpleRowReader reader = new SimpleRowReader(avroSchema);
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new SimpleRowReader.AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
Long wordCount = (Long) record.get("word_count");
assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
Utf8 word = (Utf8) record.get("word");
assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
}
});
}
assertEquals(1_333, rowCount);
}
use of com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryToParquetTest method testReadSessionFactoryBadTable.
/**
* Test {@link ReadSessionFactory} throws exception when invalid table reference is provided.
*/
@Test(expected = IllegalArgumentException.class)
public void testReadSessionFactoryBadTable() {
// Test input
final String badTableRef = "fantasmic-999999;great_data.table";
final TableReadOptions tableReadOptions = TableReadOptions.newBuilder().build();
ReadSessionFactory trsf = new ReadSessionFactory();
ReadSession trs = trsf.create(client, badTableRef, tableReadOptions);
}
use of com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method testFilter.
@Test
public void testFilter() throws IOException {
TableReference tableReference = TableReference.newBuilder().setProjectId("bigquery-public-data").setDatasetId("samples").setTableId("shakespeare").build();
TableReadOptions options = TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build();
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setRequestedStreams(1).setTableReference(tableReference).setReadOptions(options).setFormat(DataFormat.AVRO).build();
ReadSession session = client.createReadSession(request);
assertEquals(String.format("Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", TextFormat.shortDebugString(tableReference), session.toString()), 1, session.getStreamsCount());
StreamPosition readPosition = StreamPosition.newBuilder().setStream(session.getStreams(0)).build();
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
SimpleRowReader reader = new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
long rowCount = 0;
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
rowCount += response.getRowCount();
reader.processRows(response.getAvroRows(), new SimpleRowReader.AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
Long wordCount = (Long) record.get("word_count");
assertWithMessage("Row not matching expectations: %s", record.toString()).that(wordCount).isGreaterThan(100L);
}
});
}
assertEquals(1_333, rowCount);
}
Aggregations