Search in sources :

Example 1 with Qualifier

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.

the class FieldAccessDescriptor method fillInMissingQualifiers.

private FieldDescriptor fillInMissingQualifiers(FieldDescriptor fieldDescriptor, Schema schema) {
    // If there are nested arrays or maps, walk down them until we find the next row. If there
    // are missing qualifiers, fill them in. This allows users to omit the qualifiers in the
    // simple case where they are all wildcards. For example, if a is a list of a list of row,
    // the user could select a[*][*].b, however we allow them to simply type a.b for brevity.
    FieldType fieldType = schema.getField(fieldDescriptor.getFieldId()).getType();
    Iterator<Qualifier> qualifierIt = fieldDescriptor.getQualifiers().iterator();
    List<Qualifier> qualifiers = Lists.newArrayList();
    while (fieldType.getTypeName().isCollectionType() || fieldType.getTypeName().isMapType()) {
        Qualifier qualifier = qualifierIt.hasNext() ? qualifierIt.next() : null;
        if (fieldType.getTypeName().isCollectionType()) {
            qualifier = (qualifier == null) ? Qualifier.of(ListQualifier.ALL) : qualifier;
            checkArgument(qualifier.getKind().equals(Qualifier.Kind.LIST));
            checkArgument(qualifier.getList().equals(ListQualifier.ALL));
            qualifiers.add(qualifier);
            fieldType = fieldType.getCollectionElementType();
        } else if (fieldType.getTypeName().isMapType()) {
            qualifier = (qualifier == null) ? Qualifier.of(MapQualifier.ALL) : qualifier;
            checkArgument(qualifier.getKind().equals(Qualifier.Kind.MAP));
            checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
            qualifiers.add(qualifier);
            fieldType = fieldType.getMapValueType();
        }
    }
    return fieldDescriptor.toBuilder().setQualifiers(qualifiers).build();
}
Also used : MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 2 with Qualifier

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.

the class SelectHelpers method selectIntoRowWithQualifiers.

private static void selectIntoRowWithQualifiers(List<Qualifier> qualifiers, int qualifierPosition, Object value, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor, FieldType inputType, FieldType outputType) {
    if (qualifierPosition >= qualifiers.size()) {
        // We have already constructed all arrays and maps. What remains must be a Row.
        Row row = (Row) value;
        selectIntoRow(inputType.getRowSchema(), row, output, fieldAccessDescriptor);
        return;
    }
    Qualifier qualifier = qualifiers.get(qualifierPosition);
    switch(qualifier.getKind()) {
        case LIST:
            {
                FieldType nestedInputType = checkNotNull(inputType.getCollectionElementType());
                FieldType nestedOutputType = checkNotNull(outputType.getCollectionElementType());
                Iterable<Object> iterable = (Iterable) value;
                // When selecting multiple subelements under a list, we distribute the select
                // resulting in multiple lists. For example, if there is a field "list" with type
                // {a: string, b: int}[], selecting list.a, list.b results in a schema of type
                // {a: string[], b: int[]}. This preserves the invariant that the name selected always
                // appears in the top-level schema.
                Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
                FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
                Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
                List<List<Object>> selectedLists = Lists.newArrayListWithCapacity(nestedSchema.getFieldCount());
                for (int i = 0; i < nestedSchema.getFieldCount(); i++) {
                    if (iterable == null) {
                        selectedLists.add(null);
                    } else {
                        selectedLists.add(Lists.newArrayListWithCapacity(Iterables.size(iterable)));
                    }
                }
                if (iterable != null) {
                    for (Object o : iterable) {
                        Row.Builder selectElementBuilder = Row.withSchema(nestedSchema);
                        selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, o, selectElementBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
                        Row elementBeforeDistribution = selectElementBuilder.build();
                        for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                            selectedLists.get(i).add(elementBeforeDistribution.getValue(i));
                        }
                    }
                }
                for (List aList : selectedLists) {
                    output.addValue(aList);
                }
                break;
            }
        case MAP:
            {
                FieldType nestedInputType = checkNotNull(inputType.getMapValueType());
                FieldType nestedOutputType = checkNotNull(outputType.getMapValueType());
                // When selecting multiple subelements under a map, we distribute the select
                // resulting in multiple maps. The semantics are the same as for lists above (except we
                // only support subelement select for map values, not for map keys).
                Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
                FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
                Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
                List<Map> selectedMaps = Lists.newArrayListWithExpectedSize(nestedSchema.getFieldCount());
                for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                    if (value == null) {
                        selectedMaps.add(null);
                    } else {
                        selectedMaps.add(Maps.newHashMap());
                    }
                }
                if (value != null) {
                    Map<Object, Object> map = (Map) value;
                    for (Map.Entry<Object, Object> entry : map.entrySet()) {
                        Row.Builder selectValueBuilder = Row.withSchema(nestedSchema);
                        selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, entry.getValue(), selectValueBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
                        Row valueBeforeDistribution = selectValueBuilder.build();
                        for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
                            selectedMaps.get(i).put(entry.getKey(), valueBeforeDistribution.getValue(i));
                        }
                    }
                }
                for (Map aMap : selectedMaps) {
                    output.addValue(aMap);
                }
                break;
            }
        default:
            throw new RuntimeException("Unexpected type " + qualifier.getKind());
    }
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) List(java.util.List) Row(org.apache.beam.sdk.values.Row) Map(java.util.Map) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 3 with Qualifier

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.

the class FieldAccessDescriptor method validateFieldDescriptor.

private static void validateFieldDescriptor(Schema schema, FieldDescriptor fieldDescriptor) {
    Integer fieldId = fieldDescriptor.getFieldId();
    if (fieldId != null) {
        if (fieldId < 0 || fieldId >= schema.getFieldCount()) {
            throw new IllegalArgumentException("Invalid field id " + fieldId + " for schema " + schema);
        }
    }
    // If qualifiers were specified, validate them.
    // For example, if a selector was a[*][*], then a needs to be a List of a List.
    Field field = (fieldId != null) ? schema.getField(fieldId) : schema.getField(fieldDescriptor.getFieldName());
    FieldType fieldType = field.getType();
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
        switch(qualifier.getKind()) {
            case LIST:
                checkArgument(qualifier.getList().equals(ListQualifier.ALL));
                checkArgument(fieldType.getTypeName().isCollectionType());
                fieldType = fieldType.getCollectionElementType();
                break;
            case MAP:
                checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
                checkArgument(fieldType.getTypeName().equals(TypeName.MAP));
                fieldType = fieldType.getMapValueType();
                break;
            default:
                throw new IllegalStateException("Unexpected qualifier type " + qualifier.getKind());
        }
    }
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 4 with Qualifier

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.

the class SelectHelpers method getOutputSchemaHelper.

private static Schema getOutputSchemaHelper(FieldType inputFieldType, FieldAccessDescriptor fieldAccessDescriptor, List<Qualifier> qualifiers, int qualifierPosition, boolean isNullable) {
    if (qualifierPosition >= qualifiers.size()) {
        // We have walked through any containers, and are at a row type. Extract the subschema
        // for the row, preserving nullable attributes.
        checkArgument(inputFieldType.getTypeName().isCompositeType());
        return getOutputSchemaTrackingNullable(inputFieldType.getRowSchema(), fieldAccessDescriptor, isNullable);
    }
    Qualifier qualifier = qualifiers.get(qualifierPosition);
    Schema.Builder builder = Schema.builder();
    switch(qualifier.getKind()) {
        case LIST:
            checkArgument(qualifier.getList().equals(ListQualifier.ALL));
            FieldType componentType = checkNotNull(inputFieldType.getCollectionElementType());
            Schema outputComponent = getOutputSchemaHelper(componentType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
            for (Field field : outputComponent.getFields()) {
                Field newField;
                if (TypeName.ARRAY.equals(inputFieldType.getTypeName())) {
                    newField = Field.of(field.getName(), FieldType.array(field.getType()));
                } else {
                    checkArgument(TypeName.ITERABLE.equals(inputFieldType.getTypeName()));
                    newField = Field.of(field.getName(), FieldType.iterable(field.getType()));
                }
                builder.addField(newField.withNullable(isNullable));
            }
            return builder.build();
        case MAP:
            checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
            FieldType keyType = checkNotNull(inputFieldType.getMapKeyType());
            FieldType valueType = checkNotNull(inputFieldType.getMapValueType());
            Schema outputValueSchema = getOutputSchemaHelper(valueType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
            for (Field field : outputValueSchema.getFields()) {
                Field newField = Field.of(field.getName(), FieldType.map(keyType, field.getType()));
                builder.addField(newField.withNullable(isNullable));
            }
            return builder.build();
        default:
            throw new RuntimeException("unexpected");
    }
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) Schema(org.apache.beam.sdk.schemas.Schema) MapQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier) ListQualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier) Qualifier(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Aggregations

ListQualifier (org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.ListQualifier)4 MapQualifier (org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.MapQualifier)4 Qualifier (org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier)4 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)4 Schema (org.apache.beam.sdk.schemas.Schema)2 Field (org.apache.beam.sdk.schemas.Schema.Field)2 List (java.util.List)1 Map (java.util.Map)1 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)1 Row (org.apache.beam.sdk.values.Row)1