Search in sources :

Example 1 with JSONArray

use of com.google.cloud.spark.bigquery.repackaged.org.json.JSONArray in project dataproc-templates by GoogleCloudPlatform.

the class PubSubToBQ method writeToBQ.

public static void writeToBQ(JavaDStream<SparkPubsubMessage> pubSubStream, String outputProjectID, String pubSubBQOutputDataset, String PubSubBQOutputTable, Integer batchSize) {
    pubSubStream.foreachRDD(new VoidFunction<JavaRDD<SparkPubsubMessage>>() {

        @Override
        public void call(JavaRDD<SparkPubsubMessage> sparkPubsubMessageJavaRDD) throws Exception {
            sparkPubsubMessageJavaRDD.foreachPartition(new VoidFunction<Iterator<SparkPubsubMessage>>() {

                @Override
                public void call(Iterator<SparkPubsubMessage> sparkPubsubMessageIterator) throws Exception {
                    BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
                    Table table = bigquery.getTable(pubSubBQOutputDataset, PubSubBQOutputTable);
                    TableName parentTable = TableName.of(outputProjectID, pubSubBQOutputDataset, PubSubBQOutputTable);
                    Schema schema = table.getDefinition().getSchema();
                    JsonStreamWriter writer = JsonStreamWriter.newBuilder(parentTable.toString(), schema).build();
                    JSONArray jsonArr = new JSONArray();
                    while (sparkPubsubMessageIterator.hasNext()) {
                        SparkPubsubMessage message = sparkPubsubMessageIterator.next();
                        JSONObject record = new JSONObject(new String(message.getData()));
                        jsonArr.put(record);
                        if (jsonArr.length() == batchSize) {
                            ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
                            AppendRowsResponse response = future.get();
                            jsonArr = new JSONArray();
                        }
                    }
                    if (jsonArr.length() > 0) {
                        ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
                        AppendRowsResponse response = future.get();
                    }
                    writer.close();
                }
            });
        }
    });
}
Also used : JSONArray(com.google.cloud.spark.bigquery.repackaged.org.json.JSONArray) AppendRowsResponse(com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse) JavaRDD(org.apache.spark.api.java.JavaRDD) TableName(com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.TableName) SparkPubsubMessage(org.apache.spark.streaming.pubsub.SparkPubsubMessage) JSONObject(com.google.cloud.spark.bigquery.repackaged.org.json.JSONObject) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Iterator(java.util.Iterator) JsonStreamWriter(com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter)

Aggregations

AppendRowsResponse (com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse)1 JsonStreamWriter (com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter)1 TableName (com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta2.TableName)1 JSONArray (com.google.cloud.spark.bigquery.repackaged.org.json.JSONArray)1 JSONObject (com.google.cloud.spark.bigquery.repackaged.org.json.JSONObject)1 Iterator (java.util.Iterator)1 JavaRDD (org.apache.spark.api.java.JavaRDD)1 VoidFunction (org.apache.spark.api.java.function.VoidFunction)1 SparkPubsubMessage (org.apache.spark.streaming.pubsub.SparkPubsubMessage)1