Search in sources :

Example 1 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class BigQueryUtils method fromBeamField.

@Nullable
private static Object fromBeamField(FieldType fieldType, Object fieldValue) {
    if (fieldValue == null) {
        if (!fieldType.getNullable()) {
            throw new IllegalArgumentException("Field is not nullable.");
        }
        return null;
    }
    switch(fieldType.getTypeName()) {
        case ARRAY:
        case ITERABLE:
            FieldType elementType = fieldType.getCollectionElementType();
            Iterable<?> items = (Iterable<?>) fieldValue;
            List<Object> convertedItems = Lists.newArrayListWithCapacity(Iterables.size(items));
            for (Object item : items) {
                convertedItems.add(fromBeamField(elementType, item));
            }
            return convertedItems;
        case MAP:
            FieldType keyElementType = fieldType.getMapKeyType();
            FieldType valueElementType = fieldType.getMapValueType();
            Map<?, ?> pairs = (Map<?, ?>) fieldValue;
            convertedItems = Lists.newArrayListWithCapacity(pairs.size());
            for (Map.Entry<?, ?> pair : pairs.entrySet()) {
                convertedItems.add(new TableRow().set(BIGQUERY_MAP_KEY_FIELD_NAME, fromBeamField(keyElementType, pair.getKey())).set(BIGQUERY_MAP_VALUE_FIELD_NAME, fromBeamField(valueElementType, pair.getValue())));
            }
            return convertedItems;
        case ROW:
            return toTableRow((Row) fieldValue);
        case DATETIME:
            return ((Instant) fieldValue).toDateTime(DateTimeZone.UTC).toString(BIGQUERY_TIMESTAMP_PRINTER);
        case INT16:
        case INT32:
        case INT64:
        case FLOAT:
        case DOUBLE:
        case STRING:
        case BOOLEAN:
        case DECIMAL:
            return fieldValue.toString();
        case BYTES:
            return BaseEncoding.base64().encode((byte[]) fieldValue);
        case LOGICAL_TYPE:
            // For the JSON formats of DATE/DATETIME/TIME/TIMESTAMP types that BigQuery accepts, see
            // https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json#details_of_loading_json_data
            String identifier = fieldType.getLogicalType().getIdentifier();
            if (SqlTypes.DATE.getIdentifier().equals(identifier)) {
                return fieldValue.toString();
            } else if (SqlTypes.TIME.getIdentifier().equals(identifier)) {
                // LocalTime.toString() drops seconds if it is zero (see
                // https://docs.oracle.com/javase/8/docs/api/java/time/LocalTime.html#toString--).
                // but BigQuery TIME requires seconds
                // (https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type).
                // Fractional seconds are optional so drop them to conserve number of bytes transferred.
                LocalTime localTime = (LocalTime) fieldValue;
                @SuppressWarnings(// Suppression is justified because seconds are always
                "JavaLocalTimeGetNano") java.time.format.DateTimeFormatter // outputted.
                localTimeFormatter = (0 == localTime.getNano()) ? ISO_LOCAL_TIME : BIGQUERY_TIME_FORMATTER;
                return localTimeFormatter.format(localTime);
            } else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
                // Same rationale as SqlTypes.TIME
                LocalDateTime localDateTime = (LocalDateTime) fieldValue;
                @SuppressWarnings("JavaLocalDateTimeGetNano") java.time.format.DateTimeFormatter localDateTimeFormatter = (0 == localDateTime.getNano()) ? ISO_LOCAL_DATE_TIME : BIGQUERY_DATETIME_FORMATTER;
                return localDateTimeFormatter.format(localDateTime);
            } else if ("Enum".equals(identifier)) {
                return fieldType.getLogicalType(EnumerationType.class).toString((EnumerationType.Value) fieldValue);
            }
        default:
            return fieldValue.toString();
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) LocalTime(java.time.LocalTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) TableRow(com.google.api.services.bigquery.model.TableRow) AutoValue(com.google.auto.value.AutoValue) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) HashMap(java.util.HashMap) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 2 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class BigQueryUtils method fromTableFieldSchema.

private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas, SchemaConversionOptions options) {
    Schema.Builder schemaBuilder = Schema.builder();
    for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
        FieldType fieldType = fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields(), options);
        Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
        if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent() && !fieldType.getTypeName().isMapType()) {
            fieldType = FieldType.array(fieldType);
        }
        // if the mode is not defined or if it is set to NULLABLE, then the field is nullable
        boolean nullable = !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
        Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
        if (tableFieldSchema.getDescription() != null && !"".equals(tableFieldSchema.getDescription())) {
            field = field.withDescription(tableFieldSchema.getDescription());
        }
        schemaBuilder.addField(field);
    }
    return schemaBuilder.build();
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 3 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class BigQueryUtils method toBeamValue.

private static Object toBeamValue(FieldType fieldType, Object jsonBQValue) {
    if (jsonBQValue instanceof String || jsonBQValue instanceof Number || jsonBQValue instanceof Boolean) {
        String jsonBQString = jsonBQValue.toString();
        if (JSON_VALUE_PARSERS.containsKey(fieldType.getTypeName())) {
            return JSON_VALUE_PARSERS.get(fieldType.getTypeName()).apply(jsonBQString);
        } else if (fieldType.isLogicalType(SqlTypes.DATETIME.getIdentifier())) {
            return LocalDateTime.parse(jsonBQString, BIGQUERY_DATETIME_FORMATTER);
        } else if (fieldType.isLogicalType(SqlTypes.DATE.getIdentifier())) {
            return LocalDate.parse(jsonBQString);
        } else if (fieldType.isLogicalType(SqlTypes.TIME.getIdentifier())) {
            return LocalTime.parse(jsonBQString);
        }
    }
    if (jsonBQValue instanceof List) {
        return ((List<Object>) jsonBQValue).stream().map(v -> ((Map<String, Object>) v).get("v")).map(v -> toBeamValue(fieldType.getCollectionElementType(), v)).collect(toList());
    }
    if (jsonBQValue instanceof Map) {
        TableRow tr = new TableRow();
        tr.putAll((Map<String, Object>) jsonBQValue);
        return toBeamRow(fieldType.getRowSchema(), tr);
    }
    throw new UnsupportedOperationException("Converting BigQuery type '" + jsonBQValue.getClass() + "' to '" + fieldType + "' is not supported");
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ByteBuffer(java.nio.ByteBuffer) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) ISO_LOCAL_DATE_TIME(java.time.format.DateTimeFormatter.ISO_LOCAL_DATE_TIME) BigDecimal(java.math.BigDecimal) Matcher(java.util.regex.Matcher) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) TableRow(com.google.api.services.bigquery.model.TableRow) LocalTime(java.time.LocalTime) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) TableSchema(com.google.api.services.bigquery.model.TableSchema) Conversions(org.apache.avro.Conversions) Utf8(org.apache.avro.util.Utf8) Set(java.util.Set) ReadableInstant(org.joda.time.ReadableInstant) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Serializable(java.io.Serializable) List(java.util.List) LocalDate(java.time.LocalDate) AutoValue(com.google.auto.value.AutoValue) ISOChronology(org.joda.time.chrono.ISOChronology) Optional(java.util.Optional) ISO_LOCAL_TIME(java.time.format.DateTimeFormatter.ISO_LOCAL_TIME) Pattern(java.util.regex.Pattern) BaseEncoding(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding) IntStream(java.util.stream.IntStream) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) Experimental(org.apache.beam.sdk.annotations.Experimental) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) Function(java.util.function.Function) SerializableFunctions(org.apache.beam.sdk.transforms.SerializableFunctions) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) SqlTypes(org.apache.beam.sdk.schemas.logicaltypes.SqlTypes) LogicalTypes(org.apache.avro.LogicalTypes) Kind(org.apache.beam.sdk.annotations.Experimental.Kind) MonitoringInfoConstants(org.apache.beam.runners.core.metrics.MonitoringInfoConstants) ServiceCallMetric(org.apache.beam.runners.core.metrics.ServiceCallMetric) Row(org.apache.beam.sdk.values.Row) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Field(org.apache.beam.sdk.schemas.Schema.Field) GcpResourceIdentifiers(org.apache.beam.runners.core.metrics.GcpResourceIdentifiers) TableReference(com.google.api.services.bigquery.model.TableReference) GenericRecord(org.apache.avro.generic.GenericRecord) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) DateTime(org.joda.time.DateTime) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) Schema(org.apache.beam.sdk.schemas.Schema) TypeName(org.apache.beam.sdk.schemas.Schema.TypeName) Row.toRow(org.apache.beam.sdk.values.Row.toRow) Collectors.toList(java.util.stream.Collectors.toList) Instant(org.joda.time.Instant) TableRow(com.google.api.services.bigquery.model.TableRow) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toList(java.util.stream.Collectors.toList) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class BigQueryUtils method toTableFieldSchema.

private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
    List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
    for (Field schemaField : schema.getFields()) {
        FieldType type = schemaField.getType();
        TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
        if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
            field.setDescription(schemaField.getDescription());
        }
        if (!schemaField.getType().getNullable()) {
            field.setMode(Mode.REQUIRED.toString());
        }
        if (type.getTypeName().isCollectionType()) {
            type = type.getCollectionElementType();
            if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
                throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
            }
            field.setMode(Mode.REPEATED.toString());
        }
        if (TypeName.ROW == type.getTypeName()) {
            Schema subType = type.getRowSchema();
            field.setFields(toTableFieldSchema(subType));
        }
        if (TypeName.MAP == type.getTypeName()) {
            Schema mapSchema = Schema.builder().addField(BIGQUERY_MAP_KEY_FIELD_NAME, type.getMapKeyType()).addField(BIGQUERY_MAP_VALUE_FIELD_NAME, type.getMapValueType()).build();
            type = FieldType.row(mapSchema);
            field.setFields(toTableFieldSchema(mapSchema));
            field.setMode(Mode.REPEATED.toString());
        }
        field.setType(toStandardSQLTypeName(type).toString());
        fields.add(field);
    }
    return fields;
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 5 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class BeamRowToStorageApiProto method fieldDescriptorFromBeamField.

private static FieldDescriptorProto.Builder fieldDescriptorFromBeamField(Field field, int fieldNumber, List<DescriptorProto> nestedTypes) {
    FieldDescriptorProto.Builder fieldDescriptorBuilder = FieldDescriptorProto.newBuilder();
    fieldDescriptorBuilder = fieldDescriptorBuilder.setName(field.getName().toLowerCase());
    fieldDescriptorBuilder = fieldDescriptorBuilder.setNumber(fieldNumber);
    switch(field.getType().getTypeName()) {
        case ROW:
            @Nullable Schema rowSchema = field.getType().getRowSchema();
            if (rowSchema == null) {
                throw new RuntimeException("Unexpected null schema!");
            }
            DescriptorProto nested = descriptorSchemaFromBeamSchema(rowSchema);
            nestedTypes.add(nested);
            fieldDescriptorBuilder = fieldDescriptorBuilder.setType(Type.TYPE_MESSAGE).setTypeName(nested.getName());
            break;
        case ARRAY:
        case ITERABLE:
            @Nullable FieldType elementType = field.getType().getCollectionElementType();
            if (elementType == null) {
                throw new RuntimeException("Unexpected null element type!");
            }
            Preconditions.checkState(!Preconditions.checkNotNull(elementType.getTypeName()).isCollectionType(), "Nested arrays not supported by BigQuery.");
            return fieldDescriptorFromBeamField(Field.of(field.getName(), elementType), fieldNumber, nestedTypes).setLabel(Label.LABEL_REPEATED);
        case LOGICAL_TYPE:
            @Nullable LogicalType<?, ?> logicalType = field.getType().getLogicalType();
            if (logicalType == null) {
                throw new RuntimeException("Unexpected null logical type " + field.getType());
            }
            @Nullable Type type = LOGICAL_TYPES.get(logicalType.getIdentifier());
            if (type == null) {
                throw new RuntimeException("Unsupported logical type " + field.getType());
            }
            fieldDescriptorBuilder = fieldDescriptorBuilder.setType(type);
            break;
        case MAP:
            throw new RuntimeException("Map types not supported by BigQuery.");
        default:
            @Nullable Type primitiveType = PRIMITIVE_TYPES.get(field.getType().getTypeName());
            if (primitiveType == null) {
                throw new RuntimeException("Unsupported type " + field.getType());
            }
            fieldDescriptorBuilder = fieldDescriptorBuilder.setType(primitiveType);
    }
    if (field.getType().getNullable()) {
        fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_OPTIONAL);
    } else {
        fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REQUIRED);
    }
    return fieldDescriptorBuilder;
}
Also used : Type(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Type) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) LogicalType(org.apache.beam.sdk.schemas.Schema.LogicalType) Schema(org.apache.beam.sdk.schemas.Schema) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) FileDescriptorProto(com.google.protobuf.DescriptorProtos.FileDescriptorProto) DescriptorProto(com.google.protobuf.DescriptorProtos.DescriptorProto) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) Nullable(javax.annotation.Nullable) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Aggregations

FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)58 Schema (org.apache.beam.sdk.schemas.Schema)24 Field (org.apache.beam.sdk.schemas.Schema.Field)20 Row (org.apache.beam.sdk.values.Row)15 Test (org.junit.Test)15 Map (java.util.Map)10 List (java.util.List)9 ArrayList (java.util.ArrayList)7 Nullable (org.checkerframework.checker.nullness.qual.Nullable)7 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)6 BigDecimal (java.math.BigDecimal)6 Schema.toSchema (org.apache.beam.sdk.schemas.Schema.toSchema)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 Collectors (java.util.stream.Collectors)5 EnumerationType (org.apache.beam.sdk.schemas.logicaltypes.EnumerationType)5 LocalDateTime (java.time.LocalDateTime)4 LocalTime (java.time.LocalTime)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 AutoValue (com.google.auto.value.AutoValue)3