Search in sources :

Example 26 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SelectHelpers method selectIntoRowWithQualifiers.

private static void selectIntoRowWithQualifiers(List<Qualifier> qualifiers, int qualifierPosition, Object value, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor, FieldType inputType, FieldType outputType) {
    if (qualifierPosition >= qualifiers.size()) {
        // We have already constructed all arrays and maps. What remains must be a Row.
        Row row = (Row) value;
        selectIntoRow(inputType.getRowSchema(), row, output, fieldAccessDescriptor);
        return;
    }
    Qualifier qualifier = qualifiers.get(qualifierPosition);
    switch(qualifier.getKind()) {
        case LIST:
            {
                FieldType nestedInputType = checkNotNull(inputType.getCollectionElementType());
                FieldType nestedOutputType = checkNotNull(outputType.getCollectionElementType());
                Iterable<Object> iterable = (Iterable) value;
                // When selecting multiple subelements under a list, we distribute the select
                // resulting in multiple lists. For example, if there is a field "list" with type
                // {a: string, b: int}[], selecting list.a, list.b results in a schema of type
                // {a: string[], b: int[]}. This preserves the invariant that the name selected always
                // appears in the top-level schema.
                Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
                FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
                Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
                List<List<Object>> selectedLists = Lists.newArrayListWithCapacity(nestedSchema.getFieldCount());
                for (int i = 0; i < nestedSchema.getFieldCount(); i++) {
                    if (iterable == null) {
                        selectedLists.add(null);
                    } else {
                        selectedLists.add(Lists.newArrayListWithCapacity(Iterables.size(iterable)));
                    }
                }
                if (iterable != null) {
                    for (Object o : iterable) {
                        Row.Builder selectElementBuilder = Row.withSchema(nestedSchema);
                        selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, o, selectElementBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
                        Row elementBeforeDistribution = selectElementBuilder.build();
                        for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                            selectedLists.get(i).add(elementBeforeDistribution.getValue(i));
                        }
                    }
                }
                for (List aList : selectedLists) {
                    output.addValue(aList);
                }
                break;
            }
        case MAP:
            {
                FieldType nestedInputType = checkNotNull(inputType.getMapValueType());
                FieldType nestedOutputType = checkNotNull(outputType.getMapValueType());
                // When selecting multiple subelements under a map, we distribute the select
                // resulting in multiple maps. The semantics are the same as for lists above (except we
                // only support subelement select for map values, not for map keys).
                Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
                FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
                Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
                List<Map> selectedMaps = Lists.newArrayListWithExpectedSize(nestedSchema.getFieldCount());
                for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                    if (value == null) {
                        selectedMaps.add(null);
                    } else {
                        selectedMaps.add(Maps.newHashMap());
                    }
                }
                if (value != null) {
                    Map<Object, Object> map = (Map) value;
                    for (Map.Entry<Object, Object> entry : map.entrySet()) {
                        Row.Builder selectValueBuilder = Row.withSchema(nestedSchema);
                        selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, entry.getValue(), selectValueBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
                        Row valueBeforeDistribution = selectValueBuilder.build();
                        for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                            selectedMaps.get(i).put(entry.getKey(), valueBeforeDistribution.getValue(i));
                        }
                    }
                }
                for (Map aMap : selectedMaps) {
                    output.addValue(aMap);
                }
                break;
            }
        default:
            throw new RuntimeException("Unexpected type " + qualifier.getKind());
    }
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) List(java.util.List) Row(org.apache.beam.sdk.values.Row) Map(java.util.Map) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 27 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SelectHelpers method allLeafFields.

private static void allLeafFields(Schema schema, List<String> nameComponents, SerializableFunction<List<String>, String> nameFn, Map<String, String> fieldsSelected) {
    for (Field field : schema.getFields()) {
        nameComponents.add(field.getName());
        FieldType fieldType = field.getType();
        FieldType collectionElementType = fieldType.getCollectionElementType();
        if (fieldType.getTypeName().isCompositeType()) {
            allLeafFields(fieldType.getRowSchema(), nameComponents, nameFn, fieldsSelected);
        } else if (collectionElementType != null && collectionElementType.getTypeName().isCompositeType()) {
            allLeafFields(collectionElementType.getRowSchema(), nameComponents, nameFn, fieldsSelected);
        } else {
            String newName = nameFn.apply(nameComponents);
            fieldsSelected.put(String.join(".", nameComponents), newName);
        }
        nameComponents.remove(nameComponents.size() - 1);
    }
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 28 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class FromRowUsingCreator method fromRow.

@SuppressWarnings("unchecked")
public <ValueT> ValueT fromRow(Row row, Class<ValueT> clazz, Factory<List<FieldValueTypeInformation>> typeFactory) {
    if (row instanceof RowWithGetters) {
        Object target = ((RowWithGetters) row).getGetterTarget();
        if (target.getClass().equals(clazz)) {
            // Efficient path: simply extract the underlying object instead of creating a new one.
            return (ValueT) target;
        }
    }
    Object[] params = new Object[row.getFieldCount()];
    Schema schema = row.getSchema();
    List<FieldValueTypeInformation> typeInformations = typeFactory.create(clazz, schema);
    checkState(typeInformations.size() == row.getFieldCount(), "Did not have a matching number of type informations and fields.");
    for (int i = 0; i < row.getFieldCount(); ++i) {
        FieldType type = schema.getField(i).getType();
        FieldValueTypeInformation typeInformation = checkNotNull(typeInformations.get(i));
        params[i] = fromValue(type, row.getValue(i), typeInformation.getRawType(), typeInformation, typeFactory);
    }
    SchemaUserTypeCreator creator = schemaTypeCreatorFactory.create(clazz, schema);
    return (ValueT) creator.create(params);
}
Also used : RowWithGetters(org.apache.beam.sdk.values.RowWithGetters) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 29 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SchemaTranslation method fieldTypeFromProtoWithoutNullable.

private static FieldType fieldTypeFromProtoWithoutNullable(SchemaApi.FieldType protoFieldType) {
    switch(protoFieldType.getTypeInfoCase()) {
        case ATOMIC_TYPE:
            switch(protoFieldType.getAtomicType()) {
                case BYTE:
                    return FieldType.of(TypeName.BYTE);
                case INT16:
                    return FieldType.of(TypeName.INT16);
                case INT32:
                    return FieldType.of(TypeName.INT32);
                case INT64:
                    return FieldType.of(TypeName.INT64);
                case FLOAT:
                    return FieldType.of(TypeName.FLOAT);
                case DOUBLE:
                    return FieldType.of(TypeName.DOUBLE);
                case STRING:
                    return FieldType.of(TypeName.STRING);
                case BOOLEAN:
                    return FieldType.of(TypeName.BOOLEAN);
                case BYTES:
                    return FieldType.of(TypeName.BYTES);
                case UNSPECIFIED:
                    throw new IllegalArgumentException("Encountered UNSPECIFIED AtomicType");
                default:
                    throw new IllegalArgumentException("Encountered unknown AtomicType: " + protoFieldType.getAtomicType());
            }
        case ROW_TYPE:
            return FieldType.row(schemaFromProto(protoFieldType.getRowType().getSchema()));
        case ARRAY_TYPE:
            return FieldType.array(fieldTypeFromProto(protoFieldType.getArrayType().getElementType()));
        case ITERABLE_TYPE:
            return FieldType.iterable(fieldTypeFromProto(protoFieldType.getIterableType().getElementType()));
        case MAP_TYPE:
            return FieldType.map(fieldTypeFromProto(protoFieldType.getMapType().getKeyType()), fieldTypeFromProto(protoFieldType.getMapType().getValueType()));
        case LOGICAL_TYPE:
            String urn = protoFieldType.getLogicalType().getUrn();
            Class<? extends LogicalType<?, ?>> logicalTypeClass = STANDARD_LOGICAL_TYPES.get(urn);
            if (logicalTypeClass != null) {
                try {
                    return FieldType.logicalType(logicalTypeClass.getConstructor().newInstance());
                } catch (NoSuchMethodException e) {
                    throw new RuntimeException(String.format("Standard logical type '%s' does not have a zero-argument constructor.", urn), e);
                } catch (IllegalAccessException e) {
                    throw new RuntimeException(String.format("Standard logical type '%s' has a zero-argument constructor, but it is not accessible.", urn), e);
                } catch (ReflectiveOperationException e) {
                    throw new RuntimeException(String.format("Error instantiating logical type '%s' with zero-argument constructor.", urn), e);
                }
            }
            // but not yet in Java. (BEAM-7554)
            if (urn.equals(URN_BEAM_LOGICAL_DATETIME)) {
                return FieldType.DATETIME;
            } else if (urn.equals(URN_BEAM_LOGICAL_DECIMAL)) {
                return FieldType.DECIMAL;
            } else if (urn.equals(URN_BEAM_LOGICAL_JAVASDK)) {
                return FieldType.logicalType((LogicalType) SerializableUtils.deserializeFromByteArray(protoFieldType.getLogicalType().getPayload().toByteArray(), "logicalType"));
            } else {
                @Nullable FieldType argumentType = null;
                @Nullable Object argumentValue = null;
                if (protoFieldType.getLogicalType().hasArgumentType()) {
                    argumentType = fieldTypeFromProto(protoFieldType.getLogicalType().getArgumentType());
                    argumentValue = fieldValueFromProto(argumentType, protoFieldType.getLogicalType().getArgument());
                }
                return FieldType.logicalType(new UnknownLogicalType(urn, protoFieldType.getLogicalType().getPayload().toByteArray(), argumentType, argumentValue, fieldTypeFromProto(protoFieldType.getLogicalType().getRepresentation())));
            }
        default:
            throw new IllegalArgumentException("Unexpected type_info: " + protoFieldType.getTypeInfoCase());
    }
}
Also used : UnknownLogicalType(org.apache.beam.sdk.schemas.logicaltypes.UnknownLogicalType) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Nullable(org.checkerframework.checker.nullness.qual.Nullable) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 30 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class SchemaTranslation method optionsFromProto.

private static Schema.Options optionsFromProto(List<SchemaApi.Option> protoOptions) {
    Schema.Options.Builder optionBuilder = Schema.Options.builder();
    for (SchemaApi.Option protoOption : protoOptions) {
        FieldType fieldType = fieldTypeFromProto(protoOption.getType());
        optionBuilder.setOption(protoOption.getName(), fieldType, fieldValueFromProto(fieldType, protoOption.getValue()));
    }
    return optionBuilder.build();
}
Also used : SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Aggregations

FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)58 Schema (org.apache.beam.sdk.schemas.Schema)24 Field (org.apache.beam.sdk.schemas.Schema.Field)20 Row (org.apache.beam.sdk.values.Row)15 Test (org.junit.Test)15 Map (java.util.Map)10 List (java.util.List)9 ArrayList (java.util.ArrayList)7 Nullable (org.checkerframework.checker.nullness.qual.Nullable)7 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)6 BigDecimal (java.math.BigDecimal)6 Schema.toSchema (org.apache.beam.sdk.schemas.Schema.toSchema)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 Collectors (java.util.stream.Collectors)5 EnumerationType (org.apache.beam.sdk.schemas.logicaltypes.EnumerationType)5 LocalDateTime (java.time.LocalDateTime)4 LocalTime (java.time.LocalTime)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 AutoValue (com.google.auto.value.AutoValue)3