use of com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsRequest in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDirectDataSourceWriterContext method commit.
/**
* This function will determine, based on the WritingMode: if in IGNORE_INPUTS mode, no work is to
* be done; otherwise all streams will be batch committed using the BigQuery Storage Write API,
* and then: if in OVERWRITE mode, the overwriteDestinationWithTemporary function from
* BigQueryClient will be called to replace the destination table with all the data from the
* temporary table; if in ALL_ELSE mode no more work needs to be done.
*
* @see WritingMode
* @see BigQueryClient#overwriteDestinationWithTemporary(TableId temporaryTableId, TableId
* destinationTableId)
* @param messages the BigQueryWriterCommitMessage array returned by the BigQueryDataWriter's.
*/
@Override
public void commit(WriterCommitMessageContext[] messages) {
if (writingMode.equals(WritingMode.IGNORE_INPUTS))
return;
logger.info("BigQuery DataSource writer {} committed with messages:\n{}", writeUUID, Arrays.toString(messages));
BatchCommitWriteStreamsRequest.Builder batchCommitWriteStreamsRequest = BatchCommitWriteStreamsRequest.newBuilder().setParent(tablePathForBigQueryStorage);
for (WriterCommitMessageContext message : messages) {
batchCommitWriteStreamsRequest.addWriteStreams(((BigQueryDirectWriterCommitMessageContext) message).getWriteStreamName());
}
BatchCommitWriteStreamsResponse batchCommitWriteStreamsResponse = writeClient.batchCommitWriteStreams(batchCommitWriteStreamsRequest.build());
if (!batchCommitWriteStreamsResponse.hasCommitTime()) {
throw new BigQueryConnectorException("DataSource writer failed to batch commit its BigQuery write-streams");
}
logger.info("BigQuery DataSource writer has committed at time: {}", batchCommitWriteStreamsResponse.getCommitTime());
if (writingMode.equals(WritingMode.OVERWRITE)) {
Job overwriteJob = bigQueryClient.overwriteDestinationWithTemporary(tableToWrite.getTableId(), destinationTableId);
BigQueryClient.waitForJob(overwriteJob);
Preconditions.checkState(bigQueryClient.deleteTable(tableToWrite.getTableId()), new BigQueryConnectorException(String.format("Could not delete temporary table %s from BigQuery", tableToWrite)));
}
}
use of com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsRequest in project java-bigquerystorage by googleapis.
the class WritePendingStream method writePendingStream.
public static void writePendingStream(String projectId, String datasetName, String tableName) throws DescriptorValidationException, InterruptedException, IOException {
try (BigQueryWriteClient client = BigQueryWriteClient.create()) {
// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.PENDING).build();
TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest = CreateWriteStreamRequest.newBuilder().setParent(parentTable.toString()).setWriteStream(stream).build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema()).build()) {
// Write two batches to the stream, each with 10 JSON records.
for (int i = 0; i < 2; i++) {
// Create a JSON object that is compatible with the table schema.
JSONArray jsonArr = new JSONArray();
for (int j = 0; j < 10; j++) {
JSONObject record = new JSONObject();
record.put("col1", String.format("batch-record %03d-%03d", i, j));
jsonArr.put(record);
}
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
FinalizeWriteStreamResponse finalizeResponse = client.finalizeWriteStream(writeStream.getName());
System.out.println("Rows written: " + finalizeResponse.getRowCount());
}
// Commit the streams.
BatchCommitWriteStreamsRequest commitRequest = BatchCommitWriteStreamsRequest.newBuilder().setParent(parentTable.toString()).addWriteStreams(writeStream.getName()).build();
BatchCommitWriteStreamsResponse commitResponse = client.batchCommitWriteStreams(commitRequest);
// If the response does not have a commit time, it means the commit operation failed.
if (commitResponse.hasCommitTime() == false) {
for (StorageError err : commitResponse.getStreamErrorsList()) {
System.out.println(err.getErrorMessage());
}
throw new RuntimeException("Error committing the streams");
}
System.out.println("Appended and committed records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println(e);
}
}
Aggregations