Search in sources :

Example 6 with AvroRowConsumer

use of com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testColumnSelection.

@Test
public void testColumnSelection() throws IOException {
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    "bigquery-public-data", /* datasetId = */
    "samples", /* tableId = */
    "shakespeare");
    TableReadOptions options = TableReadOptions.newBuilder().addSelectedFields("word").addSelectedFields("word_count").setRowRestriction("word_count > 100").build();
    CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setParent(parentProjectId).setMaxStreamCount(1).setReadSession(ReadSession.newBuilder().setTable(table).setReadOptions(options).setDataFormat(DataFormat.AVRO).build()).build();
    ReadSession session = client.createReadSession(request);
    assertEquals(String.format("Did not receive expected number of streams for table '%s' CreateReadSession response:%n%s", table, session.toString()), 1, session.getStreamsCount());
    ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder().setReadStream(session.getStreams(0).getName()).build();
    Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema());
    String actualSchemaMessage = String.format("Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */
    true));
    assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType());
    assertEquals(actualSchemaMessage, "__root__", avroSchema.getName());
    assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size());
    assertEquals(actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType());
    assertEquals(actualSchemaMessage, Schema.Type.LONG, avroSchema.getField("word_count").schema().getType());
    SimpleRowReader reader = new SimpleRowReader(avroSchema);
    long rowCount = 0;
    ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
    for (ReadRowsResponse response : stream) {
        rowCount += response.getRowCount();
        reader.processRows(response.getAvroRows(), new AvroRowConsumer() {

            @Override
            public void accept(GenericData.Record record) {
                String rowAssertMessage = String.format("Row not matching expectations: %s", record.toString());
                Long wordCount = (Long) record.get("word_count");
                assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L);
                Utf8 word = (Utf8) record.get("word");
                assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0);
            }
        });
    }
    assertEquals(1_333, rowCount);
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) Schema(org.apache.avro.Schema) ReadRowsRequest(com.google.cloud.bigquery.storage.v1.ReadRowsRequest) GenericData(org.apache.avro.generic.GenericData) ReadRowsResponse(com.google.cloud.bigquery.storage.v1.ReadRowsResponse) Utf8(org.apache.avro.util.Utf8) TableReadOptions(com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions) CreateReadSessionRequest(com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest) Test(org.junit.Test)

Example 7 with AvroRowConsumer

use of com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadAllRows.

/**
 * Reads all the rows from the specified table and returns a list as generic Avro records.
 *
 * @param table
 * @param filter Optional. If specified, it will be used to restrict returned data.
 * @return
 */
List<GenericData.Record> ReadAllRows(String table, String filter) throws IOException {
    final List<GenericData.Record> rows = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    null, /* filter = */
    filter, new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            // clone the record since that reference will be reused by the reader.
            rows.add(new GenericRecordBuilder(record).build());
        }
    });
    return rows;
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericData(org.apache.avro.generic.GenericData)

Example 8 with AvroRowConsumer

use of com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testReadAtSnapshot.

@Test
public void testReadAtSnapshot() throws InterruptedException, IOException {
    Field intFieldSchema = Field.newBuilder("col", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).setDescription("IntegerDescription").build();
    com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(intFieldSchema);
    TableId testTableId = TableId.of(/* dataset = */
    DATASET, /* table = */
    "test_read_snapshot");
    bigquery.create(TableInfo.of(testTableId, StandardTableDefinition.of(tableSchema)));
    testTableId.toString();
    Job firstJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 1 AS col");
    Job secondJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 2 AS col");
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    ServiceOptions.getDefaultProjectId(), /* datasetId = */
    DATASET, /* tableId = */
    testTableId.getTable());
    final List<Long> rowsAfterFirstSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    firstJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterFirstSnapshot.add((Long) record.get("col"));
        }
    });
    assertEquals(Arrays.asList(1L), rowsAfterFirstSnapshot);
    final List<Long> rowsAfterSecondSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    secondJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterSecondSnapshot.add((Long) record.get("col"));
        }
    });
    Collections.sort(rowsAfterSecondSnapshot);
    assertEquals(Arrays.asList(1L, 2L), rowsAfterSecondSnapshot);
}
Also used : TableId(com.google.cloud.bigquery.TableId) AvroRowConsumer(com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Field(com.google.cloud.bigquery.Field) Job(com.google.cloud.bigquery.Job) Test(org.junit.Test)

Example 9 with AvroRowConsumer

use of com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testReadAtSnapshot.

@Test
public void testReadAtSnapshot() throws InterruptedException, IOException {
    Field intFieldSchema = Field.newBuilder("col", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).setDescription("IntegerDescription").build();
    com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(intFieldSchema);
    TableId testTableId = TableId.of(/* dataset = */
    DATASET, /* table = */
    "test_read_snapshot");
    bigquery.create(TableInfo.of(testTableId, StandardTableDefinition.of(tableSchema)));
    testTableId.toString();
    Job firstJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 1 AS col");
    Job secondJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 2 AS col");
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    ServiceOptions.getDefaultProjectId(), /* datasetId = */
    DATASET, /* tableId = */
    testTableId.getTable());
    final List<Long> rowsAfterFirstSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    firstJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterFirstSnapshot.add((Long) record.get("col"));
        }
    });
    assertEquals(Arrays.asList(1L), rowsAfterFirstSnapshot);
    final List<Long> rowsAfterSecondSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    secondJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterSecondSnapshot.add((Long) record.get("col"));
        }
    });
    Collections.sort(rowsAfterSecondSnapshot);
    assertEquals(Arrays.asList(1L, 2L), rowsAfterSecondSnapshot);
}
Also used : TableId(com.google.cloud.bigquery.TableId) AvroRowConsumer(com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Field(com.google.cloud.bigquery.Field) Job(com.google.cloud.bigquery.Job) Test(org.junit.Test)

Example 10 with AvroRowConsumer

use of com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadAllRows.

/**
 * Reads all the rows from the specified table and returns a list as generic Avro records.
 *
 * @param table
 * @param filter Optional. If specified, it will be used to restrict returned data.
 * @return
 */
List<GenericData.Record> ReadAllRows(String table, String filter) throws IOException {
    final List<GenericData.Record> rows = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    null, /* filter = */
    filter, new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            // clone the record since that reference will be reused by the reader.
            rows.add(new GenericRecordBuilder(record).build());
        }
    });
    return rows;
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericData(org.apache.avro.generic.GenericData)

Aggregations

GenericData (org.apache.avro.generic.GenericData)10 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 AvroRowConsumer (com.google.cloud.bigquery.storage.v1.it.SimpleRowReader.AvroRowConsumer)4 AvroRowConsumer (com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer)4 Field (com.google.cloud.bigquery.Field)3 Job (com.google.cloud.bigquery.Job)3 TableId (com.google.cloud.bigquery.TableId)3 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest)3 ReadRowsRequest (com.google.cloud.bigquery.storage.v1beta2.ReadRowsRequest)3 ReadRowsResponse (com.google.cloud.bigquery.storage.v1beta2.ReadRowsResponse)3 ReadSession (com.google.cloud.bigquery.storage.v1beta2.ReadSession)3 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)3 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)2 ReadRowsRequest (com.google.cloud.bigquery.storage.v1.ReadRowsRequest)2 ReadRowsResponse (com.google.cloud.bigquery.storage.v1.ReadRowsResponse)2 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)2 TableReadOptions (com.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions)2 AvroRowConsumer (com.google.cloud.bigquery.storage.v1beta1.it.SimpleRowReader.AvroRowConsumer)2 TableReadOptions (com.google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions)2