Search in sources :

Example 16 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class IcebergParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(FieldContext context, ColumnIO columnIO) {
    requireNonNull(context, "context is null");
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    Type type = context.getType();
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            ColumnIdentity fieldIdentity = subColumns.get(i);
            Optional<Field> field = constructField(new FieldContext(rowField.getType(), fieldIdentity), lookupColumnById(groupColumnIO, fieldIdentity.getId()));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 2, "Not a map: %s", context);
        ColumnIdentity keyIdentity = subColumns.get(0);
        ColumnIdentity valueIdentity = subColumns.get(1);
        // TODO validate column ID
        Optional<Field> keyField = constructField(new FieldContext(mapType.getKeyType(), keyIdentity), keyValueColumnIO.getChild(0));
        // TODO validate column ID
        Optional<Field> valueField = constructField(new FieldContext(mapType.getValueType(), valueIdentity), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 1, "Not an array: %s", context);
        ColumnIdentity elementIdentity = getOnlyElement(subColumns);
        // TODO validate column ID
        Optional<Field> field = constructField(new FieldContext(arrayType.getElementType(), elementIdentity), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Aggregations

RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)16 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)12 ImmutableList (com.google.common.collect.ImmutableList)10 Domain (io.trino.spi.predicate.Domain)9 TupleDomain (io.trino.spi.predicate.TupleDomain)9 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)8 List (java.util.List)8 MessageType (org.apache.parquet.schema.MessageType)8 GroupType (org.apache.parquet.schema.GroupType)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Field (io.trino.parquet.Field)5 RowType (io.trino.spi.type.RowType)5 Optional (java.util.Optional)5 PrimitiveType (org.apache.parquet.schema.PrimitiveType)5 Test (org.testng.annotations.Test)5 ArrayType (io.trino.spi.type.ArrayType)4 MapType (io.trino.spi.type.MapType)4 Type (io.trino.spi.type.Type)4 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3 AggregatedMemoryContext.newSimpleAggregatedMemoryContext (io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext)3