Search in sources :

Example 11 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryUtils method fromTableFieldSchema.

private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas, SchemaConversionOptions options) {
    Schema.Builder schemaBuilder = Schema.builder();
    for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
        FieldType fieldType = fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields(), options);
        Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
        if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent() && !fieldType.getTypeName().isMapType()) {
            fieldType = FieldType.array(fieldType);
        }
        // if the mode is not defined or if it is set to NULLABLE, then the field is nullable
        boolean nullable = !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
        Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
        if (tableFieldSchema.getDescription() != null && !"".equals(tableFieldSchema.getDescription())) {
            field = field.withDescription(tableFieldSchema.getDescription());
        }
        schemaBuilder.addField(field);
    }
    return schemaBuilder.build();
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 12 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryUtils method toTableFieldSchema.

private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
    List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
    for (Field schemaField : schema.getFields()) {
        FieldType type = schemaField.getType();
        TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
        if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
            field.setDescription(schemaField.getDescription());
        }
        if (!schemaField.getType().getNullable()) {
            field.setMode(Mode.REQUIRED.toString());
        }
        if (type.getTypeName().isCollectionType()) {
            type = type.getCollectionElementType();
            if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
                throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
            }
            field.setMode(Mode.REPEATED.toString());
        }
        if (TypeName.ROW == type.getTypeName()) {
            Schema subType = type.getRowSchema();
            field.setFields(toTableFieldSchema(subType));
        }
        if (TypeName.MAP == type.getTypeName()) {
            Schema mapSchema = Schema.builder().addField(BIGQUERY_MAP_KEY_FIELD_NAME, type.getMapKeyType()).addField(BIGQUERY_MAP_VALUE_FIELD_NAME, type.getMapValueType()).build();
            type = FieldType.row(mapSchema);
            field.setFields(toTableFieldSchema(mapSchema));
            field.setMode(Mode.REPEATED.toString());
        }
        field.setType(toStandardSQLTypeName(type).toString());
        fields.add(field);
    }
    return fields;
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 13 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigqueryClient method getTypedTableRow.

private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
    TableRow row;
    List<? extends Map<String, Object>> cells;
    if (rawRow instanceof TableRow) {
        // Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
        // any type conversion, but extract the cells for cell-wise processing below.
        row = (TableRow) rawRow;
        cells = row.getF();
        // Clear the cells from the row, so that row.getF() will return null. This matches the
        // behavior of rows produced by the BigQuery export API used on the service.
        row.setF(null);
    } else {
        row = new TableRow();
        // Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
        // get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
        // we will use Map.get("v") instead of TableCell.getV() get its value.
        @SuppressWarnings("unchecked") List<? extends Map<String, Object>> rawCells = (List<? extends Map<String, Object>>) rawRow.get("f");
        cells = rawCells;
    }
    checkState(cells.size() == fields.size(), "Expected that the row has the same number of cells %s as fields in the schema %s", cells.size(), fields.size());
    // Loop through all the fields in the row, normalizing their types with the TableFieldSchema
    // and storing the normalized values by field name in the Map<String, Object> that
    // underlies the TableRow.
    Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
    Iterator<TableFieldSchema> fieldIt = fields.iterator();
    while (cellIt.hasNext()) {
        Map<String, Object> cell = cellIt.next();
        TableFieldSchema fieldSchema = fieldIt.next();
        // Convert the object in this cell to the Java type corresponding to its type in the schema.
        Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
        String fieldName = fieldSchema.getName();
        checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName), "BigQueryIO does not support records with columns named %s", fieldName);
        if (convertedValue == null) {
            // intentionally omits columns with null values.
            continue;
        }
        row.set(fieldName, convertedValue);
    }
    return row;
}
Also used : TableRow(com.google.api.services.bigquery.model.TableRow) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TableList(com.google.api.services.bigquery.model.TableList) Map(java.util.Map) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 14 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigqueryClient method getTypedCellValue.

@Nullable
private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
    if (Data.isNull(v)) {
        return null;
    }
    if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
        TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
        @SuppressWarnings("unchecked") List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
        ImmutableList.Builder<Object> values = ImmutableList.builder();
        for (Map<String, Object> element : rawCells) {
            values.add(getTypedCellValue(elementSchema, element.get("v")));
        }
        return values.build();
    }
    if ("RECORD".equals(fieldSchema.getType())) {
        @SuppressWarnings("unchecked") Map<String, Object> typedV = (Map<String, Object>) v;
        return getTypedTableRow(fieldSchema.getFields(), typedV);
    }
    if ("FLOAT".equals(fieldSchema.getType())) {
        return Double.parseDouble((String) v);
    }
    if ("BOOLEAN".equals(fieldSchema.getType())) {
        return Boolean.parseBoolean((String) v);
    }
    if ("TIMESTAMP".equals(fieldSchema.getType())) {
        return (String) v;
    }
    // 1. String, 2. base64 encoded BYTES, 3. DATE, DATETIME, TIME strings.
    return v;
}
Also used : ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TableList(com.google.api.services.bigquery.model.TableList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Map(java.util.Map) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 15 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class TableRowToStorageApiProto method fieldDescriptorFromTableField.

private static void fieldDescriptorFromTableField(TableFieldSchema fieldSchema, int fieldNumber, DescriptorProto.Builder descriptorBuilder) {
    FieldDescriptorProto.Builder fieldDescriptorBuilder = FieldDescriptorProto.newBuilder();
    fieldDescriptorBuilder = fieldDescriptorBuilder.setName(fieldSchema.getName().toLowerCase());
    fieldDescriptorBuilder = fieldDescriptorBuilder.setNumber(fieldNumber);
    switch(fieldSchema.getType()) {
        case "STRUCT":
        case "RECORD":
            DescriptorProto nested = descriptorSchemaFromTableFieldSchemas(fieldSchema.getFields());
            descriptorBuilder.addNestedType(nested);
            fieldDescriptorBuilder = fieldDescriptorBuilder.setType(Type.TYPE_MESSAGE).setTypeName(nested.getName());
            break;
        default:
            @Nullable Type type = PRIMITIVE_TYPES.get(fieldSchema.getType());
            if (type == null) {
                throw new UnsupportedOperationException("Converting BigQuery type " + fieldSchema.getType() + " to Beam type is unsupported");
            }
            fieldDescriptorBuilder = fieldDescriptorBuilder.setType(type);
    }
    Optional<Mode> fieldMode = Optional.ofNullable(fieldSchema.getMode()).map(Mode::valueOf);
    if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) {
        fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REPEATED);
    } else if (!fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent()) {
        fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_OPTIONAL);
    } else {
        fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REQUIRED);
    }
    descriptorBuilder.addField(fieldDescriptorBuilder.build());
}
Also used : Type(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Type) Descriptor(com.google.protobuf.Descriptors.Descriptor) DynamicMessage(com.google.protobuf.DynamicMessage) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) TableCell(com.google.api.services.bigquery.model.TableCell) DescriptorValidationException(com.google.protobuf.Descriptors.DescriptorValidationException) Function(java.util.function.Function) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) TableRow(com.google.api.services.bigquery.model.TableRow) TableSchema(com.google.api.services.bigquery.model.TableSchema) FileDescriptor(com.google.protobuf.Descriptors.FileDescriptor) Nullable(javax.annotation.Nullable) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) FieldDescriptor(com.google.protobuf.Descriptors.FieldDescriptor) UUID(java.util.UUID) ByteString(com.google.protobuf.ByteString) Collectors.toList(java.util.stream.Collectors.toList) AbstractMap(java.util.AbstractMap) List(java.util.List) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) FileDescriptorProto(com.google.protobuf.DescriptorProtos.FileDescriptorProto) Message(com.google.protobuf.Message) Optional(java.util.Optional) Label(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Label) BaseEncoding(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding) DescriptorProto(com.google.protobuf.DescriptorProtos.DescriptorProto) Collections(java.util.Collections) Type(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Type) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) FileDescriptorProto(com.google.protobuf.DescriptorProtos.FileDescriptorProto) DescriptorProto(com.google.protobuf.DescriptorProtos.DescriptorProto) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) Nullable(javax.annotation.Nullable)

Aggregations

TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)80 TableSchema (com.google.api.services.bigquery.model.TableSchema)71 TableRow (com.google.api.services.bigquery.model.TableRow)56 Test (org.junit.Test)45 Table (com.google.api.services.bigquery.model.Table)25 TableReference (com.google.api.services.bigquery.model.TableReference)23 ArrayList (java.util.ArrayList)17 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)16 List (java.util.List)15 Map (java.util.Map)15 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)13 Pipeline (org.apache.beam.sdk.Pipeline)12 ByteString (com.google.protobuf.ByteString)10 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)10 Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)10 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)10 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)9 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)9 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)8