use of com.google.cloud.spark.bigquery.repackaged.org.json.JSONArray in project dataproc-templates by GoogleCloudPlatform.
the class PubSubToBQ method writeToBQ.
public static void writeToBQ(JavaDStream<SparkPubsubMessage> pubSubStream, String outputProjectID, String pubSubBQOutputDataset, String PubSubBQOutputTable, Integer batchSize) {
pubSubStream.foreachRDD(new VoidFunction<JavaRDD<SparkPubsubMessage>>() {
@Override
public void call(JavaRDD<SparkPubsubMessage> sparkPubsubMessageJavaRDD) throws Exception {
sparkPubsubMessageJavaRDD.foreachPartition(new VoidFunction<Iterator<SparkPubsubMessage>>() {
@Override
public void call(Iterator<SparkPubsubMessage> sparkPubsubMessageIterator) throws Exception {
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Table table = bigquery.getTable(pubSubBQOutputDataset, PubSubBQOutputTable);
TableName parentTable = TableName.of(outputProjectID, pubSubBQOutputDataset, PubSubBQOutputTable);
Schema schema = table.getDefinition().getSchema();
JsonStreamWriter writer = JsonStreamWriter.newBuilder(parentTable.toString(), schema).build();
JSONArray jsonArr = new JSONArray();
while (sparkPubsubMessageIterator.hasNext()) {
SparkPubsubMessage message = sparkPubsubMessageIterator.next();
JSONObject record = new JSONObject(new String(message.getData()));
jsonArr.put(record);
if (jsonArr.length() == batchSize) {
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
jsonArr = new JSONArray();
}
}
if (jsonArr.length() > 0) {
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
writer.close();
}
});
}
});
}
Aggregations