use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.
the class FieldAccessDescriptor method fillInMissingQualifiers.
private FieldDescriptor fillInMissingQualifiers(FieldDescriptor fieldDescriptor, Schema schema) {
// If there are nested arrays or maps, walk down them until we find the next row. If there
// are missing qualifiers, fill them in. This allows users to omit the qualifiers in the
// simple case where they are all wildcards. For example, if a is a list of a list of row,
// the user could select a[*][*].b, however we allow them to simply type a.b for brevity.
FieldType fieldType = schema.getField(fieldDescriptor.getFieldId()).getType();
Iterator<Qualifier> qualifierIt = fieldDescriptor.getQualifiers().iterator();
List<Qualifier> qualifiers = Lists.newArrayList();
while (fieldType.getTypeName().isCollectionType() || fieldType.getTypeName().isMapType()) {
Qualifier qualifier = qualifierIt.hasNext() ? qualifierIt.next() : null;
if (fieldType.getTypeName().isCollectionType()) {
qualifier = (qualifier == null) ? Qualifier.of(ListQualifier.ALL) : qualifier;
checkArgument(qualifier.getKind().equals(Qualifier.Kind.LIST));
checkArgument(qualifier.getList().equals(ListQualifier.ALL));
qualifiers.add(qualifier);
fieldType = fieldType.getCollectionElementType();
} else if (fieldType.getTypeName().isMapType()) {
qualifier = (qualifier == null) ? Qualifier.of(MapQualifier.ALL) : qualifier;
checkArgument(qualifier.getKind().equals(Qualifier.Kind.MAP));
checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
qualifiers.add(qualifier);
fieldType = fieldType.getMapValueType();
}
}
return fieldDescriptor.toBuilder().setQualifiers(qualifiers).build();
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.
the class SelectHelpers method selectIntoRowWithQualifiers.
private static void selectIntoRowWithQualifiers(List<Qualifier> qualifiers, int qualifierPosition, Object value, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor, FieldType inputType, FieldType outputType) {
if (qualifierPosition >= qualifiers.size()) {
// We have already constructed all arrays and maps. What remains must be a Row.
Row row = (Row) value;
selectIntoRow(inputType.getRowSchema(), row, output, fieldAccessDescriptor);
return;
}
Qualifier qualifier = qualifiers.get(qualifierPosition);
switch(qualifier.getKind()) {
case LIST:
{
FieldType nestedInputType = checkNotNull(inputType.getCollectionElementType());
FieldType nestedOutputType = checkNotNull(outputType.getCollectionElementType());
Iterable<Object> iterable = (Iterable) value;
// When selecting multiple subelements under a list, we distribute the select
// resulting in multiple lists. For example, if there is a field "list" with type
// {a: string, b: int}[], selecting list.a, list.b results in a schema of type
// {a: string[], b: int[]}. This preserves the invariant that the name selected always
// appears in the top-level schema.
Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
List<List<Object>> selectedLists = Lists.newArrayListWithCapacity(nestedSchema.getFieldCount());
for (int i = 0; i < nestedSchema.getFieldCount(); i++) {
if (iterable == null) {
selectedLists.add(null);
} else {
selectedLists.add(Lists.newArrayListWithCapacity(Iterables.size(iterable)));
}
}
if (iterable != null) {
for (Object o : iterable) {
Row.Builder selectElementBuilder = Row.withSchema(nestedSchema);
selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, o, selectElementBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
Row elementBeforeDistribution = selectElementBuilder.build();
for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
selectedLists.get(i).add(elementBeforeDistribution.getValue(i));
}
}
}
for (List aList : selectedLists) {
output.addValue(aList);
}
break;
}
case MAP:
{
FieldType nestedInputType = checkNotNull(inputType.getMapValueType());
FieldType nestedOutputType = checkNotNull(outputType.getMapValueType());
// When selecting multiple subelements under a map, we distribute the select
// resulting in multiple maps. The semantics are the same as for lists above (except we
// only support subelement select for map values, not for map keys).
Schema tempSchema = Schema.builder().addField("a", nestedInputType).build();
FieldAccessDescriptor tempAccessDescriptor = FieldAccessDescriptor.create().withNestedField("a", fieldAccessDescriptor).resolve(tempSchema);
Schema nestedSchema = getOutputSchema(tempSchema, tempAccessDescriptor);
List<Map> selectedMaps = Lists.newArrayListWithExpectedSize(nestedSchema.getFieldCount());
for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
if (value == null) {
selectedMaps.add(null);
} else {
selectedMaps.add(Maps.newHashMap());
}
}
if (value != null) {
Map<Object, Object> map = (Map) value;
for (Map.Entry<Object, Object> entry : map.entrySet()) {
Row.Builder selectValueBuilder = Row.withSchema(nestedSchema);
selectIntoRowWithQualifiers(qualifiers, qualifierPosition + 1, entry.getValue(), selectValueBuilder, fieldAccessDescriptor, nestedInputType, nestedOutputType);
Row valueBeforeDistribution = selectValueBuilder.build();
for (int i = 0; i < nestedSchema.getFieldCount(); ++i) {
selectedMaps.get(i).put(entry.getKey(), valueBeforeDistribution.getValue(i));
}
}
}
for (Map aMap : selectedMaps) {
output.addValue(aMap);
}
break;
}
default:
throw new RuntimeException("Unexpected type " + qualifier.getKind());
}
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.
the class FieldAccessDescriptor method validateFieldDescriptor.
private static void validateFieldDescriptor(Schema schema, FieldDescriptor fieldDescriptor) {
Integer fieldId = fieldDescriptor.getFieldId();
if (fieldId != null) {
if (fieldId < 0 || fieldId >= schema.getFieldCount()) {
throw new IllegalArgumentException("Invalid field id " + fieldId + " for schema " + schema);
}
}
// If qualifiers were specified, validate them.
// For example, if a selector was a[*][*], then a needs to be a List of a List.
Field field = (fieldId != null) ? schema.getField(fieldId) : schema.getField(fieldDescriptor.getFieldName());
FieldType fieldType = field.getType();
for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
switch(qualifier.getKind()) {
case LIST:
checkArgument(qualifier.getList().equals(ListQualifier.ALL));
checkArgument(fieldType.getTypeName().isCollectionType());
fieldType = fieldType.getCollectionElementType();
break;
case MAP:
checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
checkArgument(fieldType.getTypeName().equals(TypeName.MAP));
fieldType = fieldType.getMapValueType();
break;
default:
throw new IllegalStateException("Unexpected qualifier type " + qualifier.getKind());
}
}
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor.Qualifier in project beam by apache.
the class SelectHelpers method getOutputSchemaHelper.
private static Schema getOutputSchemaHelper(FieldType inputFieldType, FieldAccessDescriptor fieldAccessDescriptor, List<Qualifier> qualifiers, int qualifierPosition, boolean isNullable) {
if (qualifierPosition >= qualifiers.size()) {
// We have walked through any containers, and are at a row type. Extract the subschema
// for the row, preserving nullable attributes.
checkArgument(inputFieldType.getTypeName().isCompositeType());
return getOutputSchemaTrackingNullable(inputFieldType.getRowSchema(), fieldAccessDescriptor, isNullable);
}
Qualifier qualifier = qualifiers.get(qualifierPosition);
Schema.Builder builder = Schema.builder();
switch(qualifier.getKind()) {
case LIST:
checkArgument(qualifier.getList().equals(ListQualifier.ALL));
FieldType componentType = checkNotNull(inputFieldType.getCollectionElementType());
Schema outputComponent = getOutputSchemaHelper(componentType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
for (Field field : outputComponent.getFields()) {
Field newField;
if (TypeName.ARRAY.equals(inputFieldType.getTypeName())) {
newField = Field.of(field.getName(), FieldType.array(field.getType()));
} else {
checkArgument(TypeName.ITERABLE.equals(inputFieldType.getTypeName()));
newField = Field.of(field.getName(), FieldType.iterable(field.getType()));
}
builder.addField(newField.withNullable(isNullable));
}
return builder.build();
case MAP:
checkArgument(qualifier.getMap().equals(MapQualifier.ALL));
FieldType keyType = checkNotNull(inputFieldType.getMapKeyType());
FieldType valueType = checkNotNull(inputFieldType.getMapValueType());
Schema outputValueSchema = getOutputSchemaHelper(valueType, fieldAccessDescriptor, qualifiers, qualifierPosition + 1, false);
for (Field field : outputValueSchema.getFields()) {
Field newField = Field.of(field.getName(), FieldType.map(keyType, field.getType()));
builder.addField(newField.withNullable(isNullable));
}
return builder.build();
default:
throw new RuntimeException("unexpected");
}
}
Aggregations