use of org.apache.iceberg.types.Types in project hive by apache.
the class ParquetSchemaFieldNameVisitor method struct.
@Override
public Type struct(Types.StructType expected, GroupType struct, List<Type> fields) {
boolean isMessageType = struct instanceof MessageType;
List<Types.NestedField> expectedFields = expected != null ? expected.fields() : ImmutableList.of();
List<Type> types = Lists.newArrayListWithExpectedSize(expectedFields.size());
for (Types.NestedField field : expectedFields) {
int id = field.fieldId();
if (MetadataColumns.metadataFieldIds().contains(id)) {
continue;
}
Type fieldInPrunedFileSchema = typesById.get(id);
if (fieldInPrunedFileSchema == null) {
if (!originalFileSchema.containsField(field.name())) {
// Must be a new field - it isn't in this parquet file yet, so add the new field name instead of null
appendToColNamesList(isMessageType, field.name());
} else {
// This field is found in the parquet file with a different ID, so it must have been recreated since.
// Inserting a dummy col name to force Hive Parquet reader returning null for this column.
appendToColNamesList(isMessageType, DUMMY_COL_NAME);
}
} else {
// Already present column in this parquet file, add the original name
types.add(fieldInPrunedFileSchema);
appendToColNamesList(isMessageType, fieldInPrunedFileSchema.getName());
}
}
if (!isMessageType) {
GroupType groupType = new GroupType(Type.Repetition.REPEATED, fieldNames.peek(), types);
typesById.put(struct.getId().intValue(), groupType);
return groupType;
} else {
return new MessageType("table", types);
}
}
Aggregations