Search in sources :

Example 6 with Field

use of io.trino.parquet.Field in project trino by trinodb.

the class IcebergParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(FieldContext context, ColumnIO columnIO) {
    requireNonNull(context, "context is null");
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    Type type = context.getType();
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            ColumnIdentity fieldIdentity = subColumns.get(i);
            Optional<Field> field = constructField(new FieldContext(rowField.getType(), fieldIdentity), lookupColumnById(groupColumnIO, fieldIdentity.getId()));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 2, "Not a map: %s", context);
        ColumnIdentity keyIdentity = subColumns.get(0);
        ColumnIdentity valueIdentity = subColumns.get(1);
        // TODO validate column ID
        Optional<Field> keyField = constructField(new FieldContext(mapType.getKeyType(), keyIdentity), keyValueColumnIO.getChild(0));
        // TODO validate column ID
        Optional<Field> valueField = constructField(new FieldContext(mapType.getValueType(), valueIdentity), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 1, "Not an array: %s", context);
        ColumnIdentity elementIdentity = getOnlyElement(subColumns);
        // TODO validate column ID
        Optional<Field> field = constructField(new FieldContext(arrayType.getElementType(), elementIdentity), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Aggregations

Field (io.trino.parquet.Field)6 Optional (java.util.Optional)5 ImmutableList (com.google.common.collect.ImmutableList)4 GroupField (io.trino.parquet.GroupField)4 PrimitiveField (io.trino.parquet.PrimitiveField)4 RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)4 ArrayType (io.trino.spi.type.ArrayType)4 MapType (io.trino.spi.type.MapType)4 RowType (io.trino.spi.type.RowType)4 Type (io.trino.spi.type.Type)4 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 AggregatedMemoryContext.newSimpleAggregatedMemoryContext (io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext)2 ParquetCorruptionException (io.trino.parquet.ParquetCorruptionException)2 ParquetDataSource (io.trino.parquet.ParquetDataSource)2 ParquetDataSourceId (io.trino.parquet.ParquetDataSourceId)2 ParquetReaderOptions (io.trino.parquet.ParquetReaderOptions)2 ParquetTypeUtils.getColumnIO (io.trino.parquet.ParquetTypeUtils.getColumnIO)2 ParquetTypeUtils.getDescriptors (io.trino.parquet.ParquetTypeUtils.getDescriptors)2 Predicate (io.trino.parquet.predicate.Predicate)2