Search in sources :

Example 46 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class AvroUtils method toBeamField.

/**
 * Get Beam Field from avro Field.
 */
public static Schema.Field toBeamField(org.apache.avro.Schema.Field field) {
    TypeWithNullability nullableType = new TypeWithNullability(field.schema());
    FieldType beamFieldType = toFieldType(nullableType);
    return Field.of(field.name(), beamFieldType);
}
Also used : FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 47 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class AvroUtils method genericFromBeamField.

@Nullable
private static Object genericFromBeamField(Schema.FieldType fieldType, org.apache.avro.Schema avroSchema, @Nullable Object value) {
    TypeWithNullability typeWithNullability = new TypeWithNullability(avroSchema);
    if (!fieldType.getNullable().equals(typeWithNullability.nullable)) {
        throw new IllegalArgumentException("FieldType " + fieldType + " and AVRO schema " + avroSchema + " don't have matching nullability");
    }
    if (value == null) {
        return value;
    }
    switch(fieldType.getTypeName()) {
        case BYTE:
        case INT16:
        case INT32:
        case INT64:
        case FLOAT:
        case DOUBLE:
        case BOOLEAN:
            return value;
        case STRING:
            return new Utf8((String) value);
        case DECIMAL:
            BigDecimal decimal = (BigDecimal) value;
            LogicalType logicalType = typeWithNullability.type.getLogicalType();
            return new Conversions.DecimalConversion().toBytes(decimal, null, logicalType);
        case DATETIME:
            if (typeWithNullability.type.getType() == Type.INT) {
                ReadableInstant instant = (ReadableInstant) value;
                return (int) Days.daysBetween(Instant.EPOCH, instant).getDays();
            } else if (typeWithNullability.type.getType() == Type.LONG) {
                ReadableInstant instant = (ReadableInstant) value;
                return (long) instant.getMillis();
            } else {
                throw new IllegalArgumentException("Can't represent " + fieldType + " as " + typeWithNullability.type.getType());
            }
        case BYTES:
            return ByteBuffer.wrap((byte[]) value);
        case LOGICAL_TYPE:
            switch(fieldType.getLogicalType().getIdentifier()) {
                case FixedBytes.IDENTIFIER:
                    FixedBytesField fixedBytesField = checkNotNull(FixedBytesField.fromBeamFieldType(fieldType));
                    byte[] byteArray = (byte[]) value;
                    if (byteArray.length != fixedBytesField.getSize()) {
                        throw new IllegalArgumentException("Incorrectly sized byte array.");
                    }
                    return GenericData.get().createFixed(null, (byte[]) value, typeWithNullability.type);
                case EnumerationType.IDENTIFIER:
                    EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class);
                    return GenericData.get().createEnum(enumerationType.toString((EnumerationType.Value) value), typeWithNullability.type);
                case OneOfType.IDENTIFIER:
                    OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class);
                    OneOfType.Value oneOfValue = (OneOfType.Value) value;
                    FieldType innerFieldType = oneOfType.getFieldType(oneOfValue);
                    if (typeWithNullability.nullable && oneOfValue.getValue() == null) {
                        return null;
                    } else {
                        return genericFromBeamField(innerFieldType.withNullable(false), typeWithNullability.type.getTypes().get(oneOfValue.getCaseType().getValue()), oneOfValue.getValue());
                    }
                case "NVARCHAR":
                case "VARCHAR":
                case "LONGNVARCHAR":
                case "LONGVARCHAR":
                    return new Utf8((String) value);
                case "DATE":
                    return Days.daysBetween(Instant.EPOCH, (Instant) value).getDays();
                case "TIME":
                    return (int) ((Instant) value).getMillis();
                default:
                    throw new RuntimeException("Unhandled logical type " + fieldType.getLogicalType().getIdentifier());
            }
        case ARRAY:
        case ITERABLE:
            Iterable iterable = (Iterable) value;
            List<Object> translatedArray = Lists.newArrayListWithExpectedSize(Iterables.size(iterable));
            for (Object arrayElement : iterable) {
                translatedArray.add(genericFromBeamField(fieldType.getCollectionElementType(), typeWithNullability.type.getElementType(), arrayElement));
            }
            return translatedArray;
        case MAP:
            Map map = Maps.newHashMap();
            Map<Object, Object> valueMap = (Map<Object, Object>) value;
            for (Map.Entry entry : valueMap.entrySet()) {
                Utf8 key = new Utf8((String) entry.getKey());
                map.put(key, genericFromBeamField(fieldType.getMapValueType(), typeWithNullability.type.getValueType(), entry.getValue()));
            }
            return map;
        case ROW:
            return toGenericRecord((Row) value, typeWithNullability.type);
        default:
            throw new IllegalArgumentException("Unsupported type " + fieldType);
    }
}
Also used : ReadableInstant(org.joda.time.ReadableInstant) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) LogicalType(org.apache.avro.LogicalType) BigDecimal(java.math.BigDecimal) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) AvroRuntimeException(org.apache.avro.AvroRuntimeException) Conversions(org.apache.avro.Conversions) Utf8(org.apache.avro.util.Utf8) Map(java.util.Map) HashMap(java.util.HashMap) OneOfType(org.apache.beam.sdk.schemas.logicaltypes.OneOfType) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 48 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SelectHelpers method getOutputSchemaHelper.

private static Schema getOutputSchemaHelper(FieldType inputFieldType, FieldAccessDescriptor fieldAccessDescriptor, List<Qualifier> qualifiers, int qualifierPosition, boolean isNullable) {
    if (qualifierPosition >= qualifiers.size()) {
        // We have walked through any containers, and are at a row type. Extract the subschema
        // for the row, preserving nullable attributes.
        checkArgument(inputFieldType.getTypeName().isCompositeType());
        return getOutputSchemaTrackingNullable(inputFieldType.getRowSchema(), fieldAccessDescriptor, isNullable);
    }
    Qualifier qualifier = qualifiers.get(qualifierPosition);
    Schema.Builder builder = Schema.builder();
    switch(qualifier.getKind()) {
        case LIST:
            checkArgument(qualifier.getList().equals(ListQualifier.ALL));
            FieldType componentType = checkNotNull(inputFieldType.getCollectionElementType());
            Schema outputComponent = getOutputSchemaHelper(componentType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
            for (Field field : outputComponent.getFields()) {
                Field newField;
                if (TypeName.ARRAY.equals(inputFieldType.getTypeName())) {
                    newField = Field.of(field.getName(), FieldType.array(field.getType()));
                } else {
                    checkArgument(TypeName.ITERABLE.equals(inputFieldType.getTypeName()));
                    newField = Field.of(field.getName(), FieldType.iterable(field.getType()));
                }
                builder.addField(newField.withNullable(isNullable));
            }
            return builder.build();
        case MAP:
            checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
            FieldType keyType = checkNotNull(inputFieldType.getMapKeyType());
            FieldType valueType = checkNotNull(inputFieldType.getMapValueType());
            Schema outputValueSchema = getOutputSchemaHelper(valueType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
            for (Field field : outputValueSchema.getFields()) {
                Field newField = Field.of(field.getName(), FieldType.map(keyType, field.getType()));
                builder.addField(newField.withNullable(isNullable));
            }
            return builder.build();
        default:
            throw new RuntimeException("unexpected");
    }
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) Schema(org.apache.beam.sdk.schemas.Schema) MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 49 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SelectHelpers method selectIntoRow.

/**
 * Select out of a given {@link Row} object.
 */
private static void selectIntoRow(Schema inputSchema, Row input, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor) {
    if (fieldAccessDescriptor.getAllFields()) {
        List<Object> values = (input != null) ? input.getValues() : Collections.nCopies(inputSchema.getFieldCount(), null);
        output.addValues(values);
        return;
    }
    for (int fieldId : fieldAccessDescriptor.fieldIdsAccessed()) {
        // TODO: Once we support specific qualifiers (like array slices), extract them here.
        output.addValue((input != null) ? input.getValue(fieldId) : null);
    }
    Schema outputSchema = output.getSchema();
    for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested : fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
        FieldDescriptor field = nested.getKey();
        FieldAccessDescriptor nestedAccess = nested.getValue();
        FieldType nestedInputType = inputSchema.getField(field.getFieldId()).getType();
        FieldType nestedOutputType = outputSchema.getField(output.nextFieldId()).getType();
        selectIntoRowWithQualifiers(field.getQualifiers(), 0, input.getValue(field.getFieldId()), output, nestedAccess, nestedInputType, nestedOutputType);
    }
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) Map(java.util.Map) FieldDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 50 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class RenameFields method renameSchema.

// Apply the user-specified renames to the input schema.
@VisibleForTesting
static void renameSchema(Schema inputSchema, Collection<RenamePair> renames, Map<UUID, Schema> renamedSchemasMap, Map<UUID, BitSet> nestedFieldRenamedMap) {
    // The mapping of renames to apply at this level of the schema.
    Map<Integer, String> topLevelRenames = Maps.newHashMap();
    // For nested schemas, collect all applicable renames here.
    Multimap<Integer, RenamePair> nestedRenames = ArrayListMultimap.create();
    for (RenamePair rename : renames) {
        FieldAccessDescriptor access = rename.getFieldAccessDescriptor();
        if (!access.fieldIdsAccessed().isEmpty()) {
            // This references a field at this level of the schema.
            Integer fieldId = Iterables.getOnlyElement(access.fieldIdsAccessed());
            topLevelRenames.put(fieldId, rename.getNewName());
        } else {
            // This references a nested field.
            Map.Entry<Integer, FieldAccessDescriptor> nestedAccess = Iterables.getOnlyElement(access.nestedFieldsById().entrySet());
            nestedFieldRenamedMap.computeIfAbsent(inputSchema.getUUID(), s -> new BitSet(inputSchema.getFieldCount())).set(nestedAccess.getKey());
            nestedRenames.put(nestedAccess.getKey(), RenamePair.of(nestedAccess.getValue(), rename.getNewName()));
        }
    }
    Schema.Builder builder = Schema.builder();
    for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
        Field field = inputSchema.getField(i);
        FieldType fieldType = field.getType();
        String newName = topLevelRenames.getOrDefault(i, field.getName());
        Collection<RenamePair> nestedFieldRenames = nestedRenames.asMap().getOrDefault(i, Collections.emptyList());
        builder.addField(newName, renameFieldType(fieldType, nestedFieldRenames, renamedSchemasMap, nestedFieldRenamedMap));
    }
    renamedSchemasMap.put(inputSchema.getUUID(), builder.build());
}
Also used : Experimental(org.apache.beam.sdk.annotations.Experimental) PTransform(org.apache.beam.sdk.transforms.PTransform) Multimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap) Kind(org.apache.beam.sdk.annotations.Experimental.Kind) Map(java.util.Map) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) ArrayListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap) Row(org.apache.beam.sdk.values.Row) Nullable(javax.annotation.Nullable) Field(org.apache.beam.sdk.schemas.Schema.Field) DoFn(org.apache.beam.sdk.transforms.DoFn) Collection(java.util.Collection) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) Serializable(java.io.Serializable) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) AutoValue(com.google.auto.value.AutoValue) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitSet(java.util.BitSet) Collections(java.util.Collections) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) BitSet(java.util.BitSet) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Field(org.apache.beam.sdk.schemas.Schema.Field) Map(java.util.Map) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)58 Schema (org.apache.beam.sdk.schemas.Schema)24 Field (org.apache.beam.sdk.schemas.Schema.Field)20 Row (org.apache.beam.sdk.values.Row)15 Test (org.junit.Test)15 Map (java.util.Map)10 List (java.util.List)9 ArrayList (java.util.ArrayList)7 Nullable (org.checkerframework.checker.nullness.qual.Nullable)7 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)6 BigDecimal (java.math.BigDecimal)6 Schema.toSchema (org.apache.beam.sdk.schemas.Schema.toSchema)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 Collectors (java.util.stream.Collectors)5 EnumerationType (org.apache.beam.sdk.schemas.logicaltypes.EnumerationType)5 LocalDateTime (java.time.LocalDateTime)4 LocalTime (java.time.LocalTime)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 AutoValue (com.google.auto.value.AutoValue)3