Search in sources :

Example 46 with TableName

use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.

the class AppendCompleteCallback method writeToDefaultStream.

// writeToDefaultStream: Writes records from the source file to the destination table.
public static void writeToDefaultStream(String projectId, String datasetName, String tableName, String dataFile) throws DescriptorValidationException, InterruptedException, IOException {
    BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
    // Get the schema of the destination table and convert to the equivalent BigQueryStorage type.
    Table table = bigquery.getTable(datasetName, tableName);
    Schema schema = table.getDefinition().getSchema();
    TableSchema tableSchema = BqToBqStorageSchemaConverter.convertTableSchema(schema);
    // Use the JSON stream writer to send records in JSON format.
    TableName parentTable = TableName.of(projectId, datasetName, tableName);
    try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(parentTable.toString(), tableSchema).build()) {
        // Read JSON data from the source file and send it to the Write API.
        BufferedReader reader = new BufferedReader(new FileReader(dataFile));
        String line = reader.readLine();
        while (line != null) {
            // As a best practice, send batches of records, instead of single records at a time.
            JSONArray jsonArr = new JSONArray();
            for (int i = 0; i < 100; i++) {
                JSONObject record = new JSONObject(line);
                jsonArr.put(record);
                line = reader.readLine();
                if (line == null) {
                    break;
                }
            }
            // batch
            ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
            // The append method is asynchronous. Rather than waiting for the method to complete,
            // which can hurt performance, register a completion callback and continue streaming.
            ApiFutures.addCallback(future, new AppendCompleteCallback(), MoreExecutors.directExecutor());
        }
    }
}
Also used : BigQuery(com.google.cloud.bigquery.BigQuery) Table(com.google.cloud.bigquery.Table) TableSchema(com.google.cloud.bigquery.storage.v1.TableSchema) TableSchema(com.google.cloud.bigquery.storage.v1.TableSchema) Schema(com.google.cloud.bigquery.Schema) JSONArray(org.json.JSONArray) AppendRowsResponse(com.google.cloud.bigquery.storage.v1.AppendRowsResponse) TableName(com.google.cloud.bigquery.storage.v1.TableName) JSONObject(org.json.JSONObject) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) JsonStreamWriter(com.google.cloud.bigquery.storage.v1.JsonStreamWriter)

Example 47 with TableName

use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.

the class ParallelWriteCommittedStream method writeLoop.

public void writeLoop(String projectId, String datasetName, String tableName, BigQueryWriteClient client) {
    LOG.info("Start writeLoop");
    long streamSwitchCount = 0;
    long successRowCount = 0;
    long failureRowCount = 0;
    Throwable loggedError = null;
    long deadlineMillis = System.currentTimeMillis() + TEST_TIME.toMillis();
    while (System.currentTimeMillis() < deadlineMillis) {
        try {
            WriteStream writeStream = createStream(projectId, datasetName, tableName, client);
            writeToStream(client, writeStream, deadlineMillis);
        } catch (Throwable e) {
            LOG.warning("Unexpected error writing to stream: " + e.toString());
        }
        waitForInflightToReachZero(Duration.ofMinutes(1));
        synchronized (this) {
            successRowCount += successCount * BATCH_SIZE;
            failureRowCount += failureCount * BATCH_SIZE;
            if (loggedError == null) {
                loggedError = error;
            }
        }
        if (!SUPPORT_STREAM_SWITCH) {
            // If stream switch is disabled, break.
            break;
        }
        LOG.info("Sleeping before switching stream.");
        sleepIgnoringInterruption(Duration.ofMinutes(1));
        streamSwitchCount++;
    }
    LOG.info("Finish writeLoop. Success row count: " + successRowCount + " Failure row count: " + failureRowCount + " Logged error: " + loggedError + " Stream switch count: " + streamSwitchCount);
    if (successRowCount > 0 && failureRowCount == 0 && loggedError == null) {
        System.out.println("All records are appended successfully.");
    }
}
Also used : WriteStream(com.google.cloud.bigquery.storage.v1.WriteStream)

Example 48 with TableName

use of com.google.cloud.bigquery.storage.v1.TableName in project java-bigquerystorage by googleapis.

the class WriteBufferedStream method writeBufferedStream.

public static void writeBufferedStream(String projectId, String datasetName, String tableName) throws DescriptorValidationException, InterruptedException, IOException {
    try (BigQueryWriteClient client = BigQueryWriteClient.create()) {
        // Initialize a write stream for the specified table.
        // For more information on WriteStream.Type, see:
        // https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1/WriteStream.Type.html
        WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.BUFFERED).build();
        TableName parentTable = TableName.of(projectId, datasetName, tableName);
        CreateWriteStreamRequest createWriteStreamRequest = CreateWriteStreamRequest.newBuilder().setParent(parentTable.toString()).setWriteStream(stream).build();
        WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);
        // https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
        try (JsonStreamWriter writer = JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema()).build()) {
            // Write two batches to the stream, each with 10 JSON records.
            for (int i = 0; i < 2; i++) {
                JSONArray jsonArr = new JSONArray();
                for (int j = 0; j < 10; j++) {
                    // Create a JSON object that is compatible with the table schema.
                    JSONObject record = new JSONObject();
                    record.put("col1", String.format("buffered-record %03d", i));
                    jsonArr.put(record);
                }
                ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
                AppendRowsResponse response = future.get();
            }
            // Flush the buffer.
            FlushRowsRequest flushRowsRequest = FlushRowsRequest.newBuilder().setWriteStream(writeStream.getName()).setOffset(// Advance the cursor to the latest record.
            Int64Value.of(10 * 2 - 1)).build();
            FlushRowsResponse flushRowsResponse = client.flushRows(flushRowsRequest);
        // You can continue to write to the stream after flushing the buffer.
        }
        // Finalize the stream after use.
        FinalizeWriteStreamRequest finalizeWriteStreamRequest = FinalizeWriteStreamRequest.newBuilder().setName(writeStream.getName()).build();
        client.finalizeWriteStream(finalizeWriteStreamRequest);
        System.out.println("Appended and committed records successfully.");
    } catch (ExecutionException e) {
        // If the wrapped exception is a StatusRuntimeException, check the state of the operation.
        // If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
        // https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
        System.out.println(e);
    }
}
Also used : FinalizeWriteStreamRequest(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamRequest) JSONArray(org.json.JSONArray) AppendRowsResponse(com.google.cloud.bigquery.storage.v1.AppendRowsResponse) WriteStream(com.google.cloud.bigquery.storage.v1.WriteStream) BigQueryWriteClient(com.google.cloud.bigquery.storage.v1.BigQueryWriteClient) TableName(com.google.cloud.bigquery.storage.v1.TableName) CreateWriteStreamRequest(com.google.cloud.bigquery.storage.v1.CreateWriteStreamRequest) JSONObject(org.json.JSONObject) ExecutionException(java.util.concurrent.ExecutionException) JsonStreamWriter(com.google.cloud.bigquery.storage.v1.JsonStreamWriter) FlushRowsRequest(com.google.cloud.bigquery.storage.v1.FlushRowsRequest) FlushRowsResponse(com.google.cloud.bigquery.storage.v1.FlushRowsResponse)

Example 49 with TableName

use of com.google.cloud.bigquery.storage.v1.TableName in project debezium-server-batch by memiiso.

the class BatchUtilTest method testSimpleSchema.

@Test
public void testSimpleSchema() throws JsonProcessingException {
    TableName t = TableName.of("gcpProject", "bqDataset", "tableName");
    System.out.println(t.toString());
    StructType s = getEventSparkDfSchema(unwrapWithSchema);
    assertNotNull(s);
    assertTrue(s.catalogString().contains("id:int,order_date:int,purchaser:int,quantity:int,product_id:int,__op:string"));
}
Also used : TableName(com.google.cloud.bigquery.storage.v1.TableName) StructType(org.apache.spark.sql.types.StructType) Test(org.junit.jupiter.api.Test)

Aggregations

Test (org.junit.Test)33 ByteString (com.google.protobuf.ByteString)28 ArrayList (java.util.ArrayList)20 TableName (com.google.bigtable.v2.TableName)18 TableName (com.google.bigtable.admin.v2.TableName)17 InvalidArgumentException (com.google.api.gax.rpc.InvalidArgumentException)13 AbstractMessage (com.google.protobuf.AbstractMessage)13 StatusRuntimeException (io.grpc.StatusRuntimeException)13 Mutation (com.google.bigtable.v2.Mutation)12 JSONArray (org.json.JSONArray)9 JSONObject (org.json.JSONObject)9 ExecutionException (java.util.concurrent.ExecutionException)8 TableName (com.google.cloud.bigquery.storage.v1.TableName)7 ReadModifyWriteRule (com.google.bigtable.v2.ReadModifyWriteRule)6 RowFilter (com.google.bigtable.v2.RowFilter)6 BaseBigtableDataClient (com.google.cloud.bigtable.data.v2.BaseBigtableDataClient)6 ColumnFamily (com.google.bigtable.admin.v2.ColumnFamily)5 CheckAndMutateRowResponse (com.google.bigtable.v2.CheckAndMutateRowResponse)4 ReadModifyWriteRowResponse (com.google.bigtable.v2.ReadModifyWriteRowResponse)4 FieldValueList (com.google.cloud.bigquery.FieldValueList)4