Search in sources :

Example 1 with LegacySQLTypeName

use of com.google.cloud.bigquery.LegacySQLTypeName in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtils method convertProtoFieldDescriptorToBigQueryField.

/**
 * Handlers proto field to BigQuery field conversion.
 */
private static TableFieldSchema convertProtoFieldDescriptorToBigQueryField(FieldDescriptor fieldDescriptor, boolean preserveProtoFieldNames, @Nullable FieldDescriptor parent, int nestingLevel) {
    TableFieldSchema schema = new TableFieldSchema();
    String jsonName = fieldDescriptor.getJsonName();
    schema.setName(preserveProtoFieldNames || Strings.isNullOrEmpty(jsonName) ? fieldDescriptor.getName() : jsonName);
    LegacySQLTypeName sqlType = convertProtoTypeToSqlType(fieldDescriptor.getJavaType());
    schema.setType(sqlType.toString());
    if (sqlType == LegacySQLTypeName.RECORD) {
        if (nestingLevel > MAX_BIG_QUERY_RECORD_NESTING) {
            throw new IllegalArgumentException(String.format("Record field `%s.%s` is at BigQuery's nesting limit of %s, but it contains" + " message field `%s` of type `%s`. This could be caused by circular message" + " references, including a self-referential message.", parent.getMessageType().getName(), parent.getName(), MAX_BIG_QUERY_RECORD_NESTING, fieldDescriptor.getName(), fieldDescriptor.getMessageType().getName()));
        }
        List<TableFieldSchema> subFields = fieldDescriptor.getMessageType().getFields().stream().map(fd -> convertProtoFieldDescriptorToBigQueryField(fd, preserveProtoFieldNames, fieldDescriptor, nestingLevel + 1)).collect(toList());
        schema.setFields(subFields);
    }
    if (fieldDescriptor.isRepeated()) {
        schema.setMode("REPEATED");
    } else if (fieldDescriptor.isRequired()) {
        schema.setMode("REQUIRED");
    } else {
        schema.setMode("NULLABLE");
    }
    return schema;
}
Also used : MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Descriptor(com.google.protobuf.Descriptors.Descriptor) LoggerFactory(org.slf4j.LoggerFactory) JavaType(com.google.protobuf.Descriptors.FieldDescriptor.JavaType) Strings(com.google.common.base.Strings) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) TableSchema(com.google.api.services.bigquery.model.TableSchema) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ReadableByteChannel(java.nio.channels.ReadableByteChannel) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) FileDescriptorSet(com.google.protobuf.DescriptorProtos.FileDescriptorSet) ProtoDomain(org.apache.beam.sdk.extensions.protobuf.ProtoDomain) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Channels(java.nio.channels.Channels) IOException(java.io.IOException) FieldDescriptor(com.google.protobuf.Descriptors.FieldDescriptor) Reader(java.io.Reader) IOUtils(org.apache.commons.io.IOUtils) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) FileSystems(org.apache.beam.sdk.io.FileSystems) CharStreams(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.CharStreams) InputStream(java.io.InputStream) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 2 with LegacySQLTypeName

use of com.google.cloud.bigquery.LegacySQLTypeName in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryTableRowCleaner method cleanTableRowField.

/**
 * Cleans the TableRow data for a given rowKey if the value does not match requirements of the
 * BigQuery Table column type.
 *
 * @param row a TableRow object to clean.
 * @param tableFields a FieldList of Bigquery columns.
 * @param rowKey a String with the name of the field to clean.
 */
public static void cleanTableRowField(TableRow row, FieldList tableFields, String rowKey) {
    Field tableField = tableFields.get(rowKey);
    LegacySQLTypeName fieldType = tableField.getType();
    if (fieldType == LegacySQLTypeName.STRING) {
        cleanTableRowFieldStrings(row, tableFields, rowKey);
    } else if (fieldType == LegacySQLTypeName.DATE) {
        cleanTableRowFieldDates(row, tableFields, rowKey);
    }
}
Also used : Field(com.google.cloud.bigquery.Field) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName)

Example 3 with LegacySQLTypeName

use of com.google.cloud.bigquery.LegacySQLTypeName in project workbench by all-of-us.

the class BigQueryBaseTest method parseSchema.

private Schema parseSchema(Column[] columns) {
    List<Field> schemaFields = new ArrayList<>();
    for (Column column : columns) {
        String typeString = column.getType();
        LegacySQLTypeName fieldType;
        switch(column.getType().toLowerCase()) {
            case "string":
                fieldType = LegacySQLTypeName.STRING;
                break;
            case "integer":
                fieldType = LegacySQLTypeName.INTEGER;
                break;
            case "timestamp":
                fieldType = LegacySQLTypeName.TIMESTAMP;
                break;
            case "float":
                fieldType = LegacySQLTypeName.FLOAT;
                break;
            case "boolean":
                fieldType = LegacySQLTypeName.BOOLEAN;
                break;
            case "date":
                fieldType = LegacySQLTypeName.DATE;
                break;
            default:
                throw new IllegalArgumentException("Unrecognized field type '" + typeString + "'.");
        }
        schemaFields.add(Field.of(column.getName(), fieldType));
    }
    return Schema.of(schemaFields);
}
Also used : Field(com.google.cloud.bigquery.Field) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) ArrayList(java.util.ArrayList)

Example 4 with LegacySQLTypeName

use of com.google.cloud.bigquery.LegacySQLTypeName in project DataflowTemplates by GoogleCloudPlatform.

the class SchemaUtils method createBigQuerySchema.

/**
 * Infers the proper {@link TableSchema} for a Protobuf message.
 *
 * <p>The name value for each field in the returned schema will default to {@link
 * FieldDescriptor#getJsonName()}. If this value is not present, then it will use {@link
 * FieldDescriptor#getName()}.
 *
 * <p>Type mappings follow the rules:
 *
 * <ul>
 *   <li>Integers and longs map to {@link LegacySQLTypeName#INTEGER}.
 *   <li>Floats and doubles map to {@link LegacySQLTypeName#FLOAT}.
 *   <li>Booleans map to {@link LegacySQLTypeName#BOOLEAN}.
 *   <li>Strings and enums map to {@link LegacySQLTypeName#STRING}.
 *   <li>Byte strings map to {@link LegacySQLTypeName#BYTES}.
 *   <li>Messages and maps map to {@link LegacySQLTypeName#RECORD}.
 * </ul>
 *
 * <p>Fields marked as `oneof` in the proto definition will be expanded out into individual
 * fields. `oneof` fields are incompatible with BigQuery otherwise.
 *
 * <p>An error will be thrown if excessive RECORD nesting is detected. BigQuery has more
 * restrictive RECORD nesting limits than Protobuf has message nesting. Circular message
 * references and self-referential messages are not supported for this reason.
 *
 * <p>Proto repeated fields will be marked as REPEATED in BigQuery. Required and optional fields
 * will be marked as REQUIRED and NULLABLE respectively.
 *
 * <p>No description or policy tags will be set for any of the fields.
 *
 * @param descriptor a proto {@link Descriptor} to be converted into a BigQuery schema
 * @param preserveProtoFieldNames true to keep proto snake_case. False to use lowerCamelCase. If
 *     set to false and {@link FieldDescriptor#getJsonName()} is not set, then snake_case will be
 *     used.
 * @return a full BigQuery schema definition
 */
public static TableSchema createBigQuerySchema(Descriptor descriptor, boolean preserveProtoFieldNames) {
    // TableSchema and TableFieldSchema work better with Beam than Schema and Field.
    List<TableFieldSchema> fields = descriptor.getFields().stream().map(fd -> convertProtoFieldDescriptorToBigQueryField(fd, preserveProtoFieldNames, /* parent= */
    null, /* nestingLevel= */
    1)).collect(toList());
    TableSchema schema = new TableSchema();
    schema.setFields(fields);
    return schema;
}
Also used : MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Descriptor(com.google.protobuf.Descriptors.Descriptor) LoggerFactory(org.slf4j.LoggerFactory) JavaType(com.google.protobuf.Descriptors.FieldDescriptor.JavaType) Strings(com.google.common.base.Strings) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) TableSchema(com.google.api.services.bigquery.model.TableSchema) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ReadableByteChannel(java.nio.channels.ReadableByteChannel) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) FileDescriptorSet(com.google.protobuf.DescriptorProtos.FileDescriptorSet) ProtoDomain(org.apache.beam.sdk.extensions.protobuf.ProtoDomain) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Channels(java.nio.channels.Channels) IOException(java.io.IOException) FieldDescriptor(com.google.protobuf.Descriptors.FieldDescriptor) Reader(java.io.Reader) IOUtils(org.apache.commons.io.IOUtils) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) FileSystems(org.apache.beam.sdk.io.FileSystems) CharStreams(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.CharStreams) InputStream(java.io.InputStream) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 5 with LegacySQLTypeName

use of com.google.cloud.bigquery.LegacySQLTypeName in project java-retail by googleapis.

the class SetupCleanup method getGson.

public static Gson getGson() {
    JsonDeserializer<LegacySQLTypeName> typeDeserializer = (jsonElement, type, deserializationContext) -> {
        return LegacySQLTypeName.valueOf(jsonElement.getAsString());
    };
    JsonDeserializer<FieldList> subFieldsDeserializer = (jsonElement, type, deserializationContext) -> {
        Field[] fields = deserializationContext.deserialize(jsonElement.getAsJsonArray(), Field[].class);
        return FieldList.of(fields);
    };
    return new GsonBuilder().registerTypeAdapter(LegacySQLTypeName.class, typeDeserializer).registerTypeAdapter(FieldList.class, subFieldsDeserializer).create();
}
Also used : FormatOptions(com.google.cloud.bigquery.FormatOptions) Arrays(java.util.Arrays) DatasetDeleteOption(com.google.cloud.bigquery.BigQuery.DatasetDeleteOption) StandardTableDefinition(com.google.cloud.bigquery.StandardTableDefinition) NotFoundException(com.google.api.gax.rpc.NotFoundException) TableId(com.google.cloud.bigquery.TableId) STANDARD(com.google.cloud.storage.StorageClass.STANDARD) BigQueryException(com.google.cloud.bigquery.BigQueryException) BlobId(com.google.cloud.storage.BlobId) StorageOptions(com.google.cloud.storage.StorageOptions) DatasetId(com.google.cloud.bigquery.DatasetId) GsonBuilder(com.google.gson.GsonBuilder) BigQuery(com.google.cloud.bigquery.BigQuery) BigQueryOptions(com.google.cloud.bigquery.BigQueryOptions) Dataset(com.google.cloud.bigquery.Dataset) Blob(com.google.cloud.storage.Blob) Gson(com.google.gson.Gson) Schema(com.google.cloud.bigquery.Schema) FulfillmentInfo(com.google.cloud.retail.v2.FulfillmentInfo) Product(com.google.cloud.retail.v2.Product) Job(com.google.cloud.bigquery.Job) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) Field(com.google.cloud.bigquery.Field) TableDefinition(com.google.cloud.bigquery.TableDefinition) DeleteProductRequest(com.google.cloud.retail.v2.DeleteProductRequest) Type(com.google.cloud.retail.v2.Product.Type) Bucket(com.google.cloud.storage.Bucket) Page(com.google.api.gax.paging.Page) Availability(com.google.cloud.retail.v2.Product.Availability) BlobInfo(com.google.cloud.storage.BlobInfo) JobInfo(com.google.cloud.bigquery.JobInfo) LoadJobConfiguration(com.google.cloud.bigquery.LoadJobConfiguration) Files(java.nio.file.Files) BucketInfo(com.google.cloud.storage.BucketInfo) IOException(java.io.IOException) FieldList(com.google.cloud.bigquery.FieldList) GetProductRequest(com.google.cloud.retail.v2.GetProductRequest) Paths(java.nio.file.Paths) CreateProductRequest(com.google.cloud.retail.v2.CreateProductRequest) ProductServiceClient(com.google.cloud.retail.v2.ProductServiceClient) StorageException(com.google.cloud.storage.StorageException) JsonDeserializer(com.google.gson.JsonDeserializer) DatasetInfo(com.google.cloud.bigquery.DatasetInfo) TableInfo(com.google.cloud.bigquery.TableInfo) PriceInfo(com.google.cloud.retail.v2.PriceInfo) Storage(com.google.cloud.storage.Storage) LegacySQLTypeName(com.google.cloud.bigquery.LegacySQLTypeName) GsonBuilder(com.google.gson.GsonBuilder) FieldList(com.google.cloud.bigquery.FieldList)

Aggregations

LegacySQLTypeName (com.google.cloud.bigquery.LegacySQLTypeName)5 Field (com.google.cloud.bigquery.Field)3 IOException (java.io.IOException)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2 TableSchema (com.google.api.services.bigquery.model.TableSchema)2 Page (com.google.api.gax.paging.Page)1 NotFoundException (com.google.api.gax.rpc.NotFoundException)1 BigQuery (com.google.cloud.bigquery.BigQuery)1 DatasetDeleteOption (com.google.cloud.bigquery.BigQuery.DatasetDeleteOption)1 BigQueryException (com.google.cloud.bigquery.BigQueryException)1 BigQueryOptions (com.google.cloud.bigquery.BigQueryOptions)1 Dataset (com.google.cloud.bigquery.Dataset)1 DatasetId (com.google.cloud.bigquery.DatasetId)1 DatasetInfo (com.google.cloud.bigquery.DatasetInfo)1 FieldList (com.google.cloud.bigquery.FieldList)1 FormatOptions (com.google.cloud.bigquery.FormatOptions)1 Job (com.google.cloud.bigquery.Job)1 JobInfo (com.google.cloud.bigquery.JobInfo)1 LoadJobConfiguration (com.google.cloud.bigquery.LoadJobConfiguration)1 Schema (com.google.cloud.bigquery.Schema)1