use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.
the class AppendCompleteCallback method writeToDefaultStream.
// writeToDefaultStream: Writes records from the source file to the destination table.
public static void writeToDefaultStream(String projectId, String datasetName, String tableName, String dataFile) throws DescriptorValidationException, InterruptedException, IOException {
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Get the schema of the destination table and convert to the equivalent BigQueryStorage type.
Table table = bigquery.getTable(datasetName, tableName);
Schema schema = table.getDefinition().getSchema();
TableSchema tableSchema = BqToBqStorageSchemaConverter.convertTableSchema(schema);
// Use the JSON stream writer to send records in JSON format.
TableName parentTable = TableName.of(projectId, datasetName, tableName);
try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(parentTable.toString(), tableSchema).build()) {
// Read JSON data from the source file and send it to the Write API.
BufferedReader reader = new BufferedReader(new FileReader(dataFile));
String line = reader.readLine();
while (line != null) {
// As a best practice, send batches of records, instead of single records at a time.
JSONArray jsonArr = new JSONArray();
for (int i = 0; i < 100; i++) {
JSONObject record = new JSONObject(line);
jsonArr.put(record);
line = reader.readLine();
if (line == null) {
break;
}
}
// batch
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
// The append method is asynchronous. Rather than waiting for the method to complete,
// which can hurt performance, register a completion callback and continue streaming.
ApiFutures.addCallback(future, new AppendCompleteCallback(), MoreExecutors.directExecutor());
}
}
}
use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.
the class ParallelWriteCommittedStream method writeLoop.
public void writeLoop(String projectId, String datasetName, String tableName, BigQueryWriteClient client) {
LOG.info("Start writeLoop");
long streamSwitchCount = 0;
long successRowCount = 0;
long failureRowCount = 0;
Throwable loggedError = null;
long deadlineMillis = System.currentTimeMillis() + TEST_TIME.toMillis();
while (System.currentTimeMillis() < deadlineMillis) {
try {
WriteStream writeStream = createStream(projectId, datasetName, tableName, client);
writeToStream(client, writeStream, deadlineMillis);
} catch (Throwable e) {
LOG.warning("Unexpected error writing to stream: " + e.toString());
}
waitForInflightToReachZero(Duration.ofMinutes(1));
synchronized (this) {
successRowCount += successCount * BATCH_SIZE;
failureRowCount += failureCount * BATCH_SIZE;
if (loggedError == null) {
loggedError = error;
}
}
if (!SUPPORT_STREAM_SWITCH) {
// If stream switch is disabled, break.
break;
}
LOG.info("Sleeping before switching stream.");
sleepIgnoringInterruption(Duration.ofMinutes(1));
streamSwitchCount++;
}
LOG.info("Finish writeLoop. Success row count: " + successRowCount + " Failure row count: " + failureRowCount + " Logged error: " + loggedError + " Stream switch count: " + streamSwitchCount);
if (successRowCount > 0 && failureRowCount == 0 && loggedError == null) {
System.out.println("All records are appended successfully.");
}
}
use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.
the class WriteBufferedStream method writeBufferedStream.
public static void writeBufferedStream(String projectId, String datasetName, String tableName) throws DescriptorValidationException, InterruptedException, IOException {
try (BigQueryWriteClient client = BigQueryWriteClient.create()) {
// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.BUFFERED).build();
TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest = CreateWriteStreamRequest.newBuilder().setParent(parentTable.toString()).setWriteStream(stream).build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema()).build()) {
// Write two batches to the stream, each with 10 JSON records.
for (int i = 0; i < 2; i++) {
JSONArray jsonArr = new JSONArray();
for (int j = 0; j < 10; j++) {
// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("buffered-record %03d", i));
jsonArr.put(record);
}
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
// Flush the buffer.
FlushRowsRequest flushRowsRequest = FlushRowsRequest.newBuilder().setWriteStream(writeStream.getName()).setOffset(// Advance the cursor to the latest record.
Int64Value.of(10 * 2 - 1)).build();
FlushRowsResponse flushRowsResponse = client.flushRows(flushRowsRequest);
// You can continue to write to the stream after flushing the buffer.
}
// Finalize the stream after use.
FinalizeWriteStreamRequest finalizeWriteStreamRequest = FinalizeWriteStreamRequest.newBuilder().setName(writeStream.getName()).build();
client.finalizeWriteStream(finalizeWriteStreamRequest);
System.out.println("Appended and committed records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println(e);
}
}
use of com.google.cloud.bigquery.storage.v1.TableName in project debezium-server-batch by memiiso.
the class BatchUtilTest method testSimpleSchema.
@Test
public void testSimpleSchema() throws JsonProcessingException {
TableName t = TableName.of("gcpProject", "bqDataset", "tableName");
System.out.println(t.toString());
StructType s = getEventSparkDfSchema(unwrapWithSchema);
assertNotNull(s);
assertTrue(s.catalogString().contains("id:int,order_date:int,purchaser:int,quantity:int,product_id:int,__op:string"));
}
Aggregations