Search in sources :

Example 1 with PrimitiveField

use of io.trino.parquet.PrimitiveField in project trino by trinodb.

the class HiveParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(Type type, ColumnIO columnIO) {
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            String name = rowField.getName().orElseThrow().toLowerCase(Locale.ENGLISH);
            Optional<Field> field = constructField(rowField.getType(), lookupColumnByName(groupColumnIO, name));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        Optional<Field> keyField = constructField(mapType.getKeyType(), keyValueColumnIO.getChild(0));
        Optional<Field> valueField = constructField(mapType.getValueType(), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        Optional<Field> field = constructField(arrayType.getElementType(), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Example 2 with PrimitiveField

use of io.trino.parquet.PrimitiveField in project trino by trinodb.

the class IcebergParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(FieldContext context, ColumnIO columnIO) {
    requireNonNull(context, "context is null");
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    Type type = context.getType();
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            ColumnIdentity fieldIdentity = subColumns.get(i);
            Optional<Field> field = constructField(new FieldContext(rowField.getType(), fieldIdentity), lookupColumnById(groupColumnIO, fieldIdentity.getId()));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 2, "Not a map: %s", context);
        ColumnIdentity keyIdentity = subColumns.get(0);
        ColumnIdentity valueIdentity = subColumns.get(1);
        // TODO validate column ID
        Optional<Field> keyField = constructField(new FieldContext(mapType.getKeyType(), keyIdentity), keyValueColumnIO.getChild(0));
        // TODO validate column ID
        Optional<Field> valueField = constructField(new FieldContext(mapType.getValueType(), valueIdentity), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 1, "Not an array: %s", context);
        ColumnIdentity elementIdentity = getOnlyElement(subColumns);
        // TODO validate column ID
        Optional<Field> field = constructField(new FieldContext(arrayType.getElementType(), elementIdentity), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)2 Field (io.trino.parquet.Field)2 GroupField (io.trino.parquet.GroupField)2 PrimitiveField (io.trino.parquet.PrimitiveField)2 RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)2 ArrayType (io.trino.spi.type.ArrayType)2 MapType (io.trino.spi.type.MapType)2 RowType (io.trino.spi.type.RowType)2 Optional (java.util.Optional)2 GroupColumnIO (org.apache.parquet.io.GroupColumnIO)2 PrimitiveColumnIO (org.apache.parquet.io.PrimitiveColumnIO)2 Type (io.trino.spi.type.Type)1