Search in sources :

Example 1 with ProtoRows

use of com.google.cloud.bigquery.storage.v1.ProtoRows in project spark-bigquery-connector by GoogleCloudDataproc.

the class BigQueryDirectDataWriterHelper method commit.

/**
 * Appends any data that remains in the protoRows, waits for 500 milliseconds, and finalizes the
 * write-stream.
 *
 * @return The finalized row-count of the write-stream.
 * @throws IOException If the row-count returned by the FinalizeWriteStreamResponse does not match
 *     the expected offset (which is equal to the number of rows appended thus far).
 * @see this#writeStreamRowCount
 */
public long commit() throws IOException {
    if (this.protoRows.getSerializedRowsCount() != 0) {
        sendAppendRowsRequest();
    }
    waitBeforeFinalization();
    FinalizeWriteStreamRequest finalizeWriteStreamRequest = FinalizeWriteStreamRequest.newBuilder().setName(writeStreamName).build();
    FinalizeWriteStreamResponse finalizeResponse = retryFinalizeWriteStream(finalizeWriteStreamRequest);
    long expectedFinalizedRowCount = writeStreamRowCount;
    long responseFinalizedRowCount = finalizeResponse.getRowCount();
    if (responseFinalizedRowCount != expectedFinalizedRowCount) {
        throw new IOException(String.format("On stream %s finalization, expected finalized row count %d but received %d", writeStreamName, expectedFinalizedRowCount, responseFinalizedRowCount));
    }
    logger.debug("Write-stream {} finalized with row-count {}", writeStreamName, responseFinalizedRowCount);
    return responseFinalizedRowCount;
}
Also used : FinalizeWriteStreamRequest(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamRequest) FinalizeWriteStreamResponse(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse) IOException(java.io.IOException)

Example 2 with ProtoRows

use of com.google.cloud.bigquery.storage.v1.ProtoRows in project spark-bigquery-connector by GoogleCloudDataproc.

the class ProtobufUtils method toProtoRows.

/**
 * Spark Row --> ProtoRows converter utils: To be used by the DataWriters facing the BigQuery
 * Storage Write API
 */
public static ProtoRows toProtoRows(StructType sparkSchema, InternalRow[] rows) {
    try {
        Descriptors.Descriptor schemaDescriptor = toDescriptor(sparkSchema);
        ProtoRows.Builder protoRows = ProtoRows.newBuilder();
        for (InternalRow row : rows) {
            DynamicMessage rowMessage = buildSingleRowMessage(sparkSchema, schemaDescriptor, row);
            protoRows.addSerializedRows(rowMessage.toByteString());
        }
        return protoRows.build();
    } catch (Exception e) {
        throw new RuntimeException("Could not convert Internal Rows to Proto Rows.", e);
    }
}
Also used : ProtoRows(com.google.cloud.bigquery.storage.v1.ProtoRows) Descriptors(com.google.protobuf.Descriptors) DynamicMessage(com.google.protobuf.DynamicMessage) InternalRow(org.apache.spark.sql.catalyst.InternalRow)

Example 3 with ProtoRows

use of com.google.cloud.bigquery.storage.v1.ProtoRows in project spark-bigquery-connector by GoogleCloudDataproc.

the class ProtobufUtilsTest method testSettingARequiredFieldAsNull.

@Test
public void testSettingARequiredFieldAsNull() throws Exception {
    try {
        ProtoRows converted = toProtoRows(new StructType().add(new StructField("String", DataTypes.StringType, false, Metadata.empty())), new InternalRow[] { new GenericInternalRow(new Object[] { null }) });
        fail("Convert did not assert field's /'Required/' status");
    } catch (Exception ignored) {
    }
    try {
        ProtoRows converted = toProtoRows(new StructType().add(new StructField("String", DataTypes.StringType, true, Metadata.empty())), new InternalRow[] { new GenericInternalRow(new Object[] { null }) });
    } catch (Exception e) {
        fail("A nullable field could not be set to null.");
    }
}
Also used : ProtobufUtils.toProtoRows(com.google.cloud.spark.bigquery.ProtobufUtils.toProtoRows) ProtoRows(com.google.cloud.bigquery.storage.v1.ProtoRows) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow) AssumptionViolatedException(org.junit.AssumptionViolatedException) Test(org.junit.Test)

Example 4 with ProtoRows

use of com.google.cloud.bigquery.storage.v1.ProtoRows in project spark-bigquery-connector by GoogleCloudDataproc.

the class ProtobufUtilsTest method testSparkRowToProtoRow.

@Test
public void testSparkRowToProtoRow() throws Exception {
    ProtoRows converted = toProtoRows(BIG_SPARK_SCHEMA, new InternalRow[] { new GenericInternalRow(new Object[] { 1, UTF8String.fromString("A"), ArrayData.toArrayData(new int[] { 0, 1, 2 }), INTERNAL_STRUCT_DATA, 3.14, true, new byte[] { 11, 0x7F }, 1594080000000L, 1594080000000L, Decimal.apply(new BigDecimal("-99999999999999999999999999999.999999999", new MathContext(BQ_NUMERIC_PRECISION)), BQ_NUMERIC_PRECISION, BQ_NUMERIC_SCALE), UTF8String.fromString("-578960446186580977117854925043439539266.34992332820282019728792003956564819968") }) });
    ProtoRows expected = MY_PROTO_ROWS;
    assertThat(converted.getSerializedRows(0).toByteArray()).isEqualTo(expected.getSerializedRows(0).toByteArray());
}
Also used : ProtobufUtils.toProtoRows(com.google.cloud.spark.bigquery.ProtobufUtils.toProtoRows) ProtoRows(com.google.cloud.bigquery.storage.v1.ProtoRows) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow) BigDecimal(java.math.BigDecimal) MathContext(java.math.MathContext) Test(org.junit.Test)

Example 5 with ProtoRows

use of com.google.cloud.bigquery.storage.v1.ProtoRows in project java-bigquerystorage by googleapis.

the class ParallelWriteCommittedStream method createAppendRows.

private ProtoRows createAppendRows(Descriptor descriptor) {
    ProtoRows.Builder rowsBuilder = ProtoRows.newBuilder();
    for (int i = 0; i < BATCH_SIZE; i++) {
        byte[] payload = new byte[ROW_SIZE];
        ThreadLocalRandom.current().nextBytes(payload);
        JSONObject record = new JSONObject();
        record.put("col1", new String(payload));
        Message protoMessage = JsonToProtoMessage.convertJsonToProtoMessage(descriptor, record);
        rowsBuilder.addSerializedRows(protoMessage.toByteString());
    }
    return rowsBuilder.build();
}
Also used : ProtoRows(com.google.cloud.bigquery.storage.v1.ProtoRows) JSONObject(org.json.JSONObject) JsonToProtoMessage(com.google.cloud.bigquery.storage.v1.JsonToProtoMessage) Message(com.google.protobuf.Message)

Aggregations

ProtoRows (com.google.cloud.bigquery.storage.v1.ProtoRows)6 DynamicMessage (com.google.protobuf.DynamicMessage)3 ProtobufUtils.toProtoRows (com.google.cloud.spark.bigquery.ProtobufUtils.toProtoRows)2 ByteString (com.google.protobuf.ByteString)2 GenericInternalRow (org.apache.spark.sql.catalyst.expressions.GenericInternalRow)2 Test (org.junit.Test)2 TableRow (com.google.api.services.bigquery.model.TableRow)1 AppendRowsResponse (com.google.cloud.bigquery.storage.v1.AppendRowsResponse)1 FinalizeWriteStreamRequest (com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamRequest)1 FinalizeWriteStreamResponse (com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse)1 JsonToProtoMessage (com.google.cloud.bigquery.storage.v1.JsonToProtoMessage)1 WriteStream (com.google.cloud.bigquery.storage.v1.WriteStream)1 Descriptors (com.google.protobuf.Descriptors)1 Descriptor (com.google.protobuf.Descriptors.Descriptor)1 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 Message (com.google.protobuf.Message)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 MathContext (java.math.MathContext)1 Iterator (java.util.Iterator)1