Search in sources :

Example 6 with TableSchema

use of com.google.cloud.bigquery.storage.v1beta2.TableSchema in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method testReadAtSnapshot.

@Test
public void testReadAtSnapshot() throws InterruptedException, IOException {
    Field intFieldSchema = Field.newBuilder("col", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).setDescription("IntegerDescription").build();
    com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(intFieldSchema);
    TableId testTableId = TableId.of(/* dataset = */
    DATASET, /* table = */
    "test_read_snapshot");
    bigquery.create(TableInfo.of(testTableId, StandardTableDefinition.of(tableSchema)));
    testTableId.toString();
    Job firstJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 1 AS col");
    Job secondJob = RunQueryAppendJobAndExpectSuccess(/* destinationTableId = */
    testTableId, /* query = */
    "SELECT 2 AS col");
    String table = BigQueryResource.FormatTableResource(/* projectId = */
    ServiceOptions.getDefaultProjectId(), /* datasetId = */
    DATASET, /* tableId = */
    testTableId.getTable());
    final List<Long> rowsAfterFirstSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    firstJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterFirstSnapshot.add((Long) record.get("col"));
        }
    });
    assertEquals(Arrays.asList(1L), rowsAfterFirstSnapshot);
    final List<Long> rowsAfterSecondSnapshot = new ArrayList<>();
    ProcessRowsAtSnapshot(/* table = */
    table, /* snapshotInMillis = */
    secondJob.getStatistics().getEndTime(), /* filter = */
    null, /* consumer = */
    new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            rowsAfterSecondSnapshot.add((Long) record.get("col"));
        }
    });
    Collections.sort(rowsAfterSecondSnapshot);
    assertEquals(Arrays.asList(1L, 2L), rowsAfterSecondSnapshot);
}
Also used : TableId(com.google.cloud.bigquery.TableId) AvroRowConsumer(com.google.cloud.bigquery.storage.v1beta2.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Field(com.google.cloud.bigquery.Field) Job(com.google.cloud.bigquery.Job) Test(org.junit.Test)

Example 7 with TableSchema

use of com.google.cloud.bigquery.storage.v1beta2.TableSchema in project java-bigquerystorage by googleapis.

the class AppendCompleteCallback method writeToDefaultStream.

// writeToDefaultStream: Writes records from the source file to the destination table.
public static void writeToDefaultStream(String projectId, String datasetName, String tableName, String dataFile) throws DescriptorValidationException, InterruptedException, IOException {
    BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
    // Get the schema of the destination table and convert to the equivalent BigQueryStorage type.
    Table table = bigquery.getTable(datasetName, tableName);
    Schema schema = table.getDefinition().getSchema();
    TableSchema tableSchema = BqToBqStorageSchemaConverter.convertTableSchema(schema);
    // Use the JSON stream writer to send records in JSON format.
    TableName parentTable = TableName.of(projectId, datasetName, tableName);
    try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(parentTable.toString(), tableSchema).build()) {
        // Read JSON data from the source file and send it to the Write API.
        BufferedReader reader = new BufferedReader(new FileReader(dataFile));
        String line = reader.readLine();
        while (line != null) {
            // As a best practice, send batches of records, instead of single records at a time.
            JSONArray jsonArr = new JSONArray();
            for (int i = 0; i < 100; i++) {
                JSONObject record = new JSONObject(line);
                jsonArr.put(record);
                line = reader.readLine();
                if (line == null) {
                    break;
                }
            }
            // batch
            ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
            // The append method is asynchronous. Rather than waiting for the method to complete,
            // which can hurt performance, register a completion callback and continue streaming.
            ApiFutures.addCallback(future, new AppendCompleteCallback(), MoreExecutors.directExecutor());
        }
    }
}
Also used : BigQuery(com.google.cloud.bigquery.BigQuery) Table(com.google.cloud.bigquery.Table) TableSchema(com.google.cloud.bigquery.storage.v1.TableSchema) TableSchema(com.google.cloud.bigquery.storage.v1.TableSchema) Schema(com.google.cloud.bigquery.Schema) JSONArray(org.json.JSONArray) AppendRowsResponse(com.google.cloud.bigquery.storage.v1.AppendRowsResponse) TableName(com.google.cloud.bigquery.storage.v1.TableName) JSONObject(org.json.JSONObject) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) JsonStreamWriter(com.google.cloud.bigquery.storage.v1.JsonStreamWriter)

Aggregations

JSONArray (org.json.JSONArray)6 JSONObject (org.json.JSONObject)6 Test (org.junit.Test)5 FieldValueList (com.google.cloud.bigquery.FieldValueList)4 TableResult (com.google.cloud.bigquery.TableResult)4 AppendRowsResponse (com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse)4 JsonStreamWriter (com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter)4 TableFieldSchema (com.google.cloud.bigquery.storage.v1beta2.TableFieldSchema)4 TableName (com.google.cloud.bigquery.storage.v1beta2.TableName)4 TableSchema (com.google.cloud.bigquery.storage.v1beta2.TableSchema)4 BigQuery (com.google.cloud.bigquery.BigQuery)2 Schema (com.google.cloud.bigquery.Schema)2 Table (com.google.cloud.bigquery.Table)2 AppendRowsResponse (com.google.cloud.bigquery.storage.v1.AppendRowsResponse)2 JsonStreamWriter (com.google.cloud.bigquery.storage.v1.JsonStreamWriter)2 TableName (com.google.cloud.bigquery.storage.v1.TableName)2 TableSchema (com.google.cloud.bigquery.storage.v1.TableSchema)2 AppendResult (com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse.AppendResult)2 BigDecimal (java.math.BigDecimal)2 Field (com.google.cloud.bigquery.Field)1