Search in sources :

Example 1 with GroupField

use of io.trino.parquet.GroupField in project trino by trinodb.

the class HiveParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(Type type, ColumnIO columnIO) {
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            String name = rowField.getName().orElseThrow().toLowerCase(Locale.ENGLISH);
            Optional<Field> field = constructField(rowField.getType(), lookupColumnByName(groupColumnIO, name));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        Optional<Field> keyField = constructField(mapType.getKeyType(), keyValueColumnIO.getChild(0));
        Optional<Field> valueField = constructField(mapType.getValueType(), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        Optional<Field> field = constructField(arrayType.getElementType(), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Example 2 with GroupField

use of io.trino.parquet.GroupField in project trino by trinodb.

the class ParquetReader method readStruct.

private ColumnChunk readStruct(GroupField field) throws IOException {
    List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
    Block[] blocks = new Block[fields.size()];
    ColumnChunk columnChunk = null;
    List<Optional<Field>> parameters = field.getChildren();
    for (int i = 0; i < fields.size(); i++) {
        Optional<Field> parameter = parameters.get(i);
        if (parameter.isPresent()) {
            columnChunk = readColumnChunk(parameter.get());
            blocks[i] = columnChunk.getBlock();
        }
    }
    for (int i = 0; i < fields.size(); i++) {
        if (blocks[i] == null) {
            blocks[i] = RunLengthEncodedBlock.create(field.getType().getTypeParameters().get(i), null, columnChunk.getBlock().getPositionCount());
        }
    }
    BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    boolean[] structIsNullVector = structIsNull.toBooleanArray();
    Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks);
    return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) Optional(java.util.Optional) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ArrayBlock(io.trino.spi.block.ArrayBlock) RowBlock(io.trino.spi.block.RowBlock)

Example 3 with GroupField

use of io.trino.parquet.GroupField in project trino by trinodb.

the class ParquetReader method readArray.

private ColumnChunk readArray(GroupField field) throws IOException {
    List<Type> parameters = field.getType().getTypeParameters();
    checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
    Field elementField = field.getChildren().get(0).get();
    ColumnChunk columnChunk = readColumnChunk(elementField);
    IntList offsets = new IntArrayList();
    BooleanList valueIsNull = new BooleanArrayList();
    calculateCollectionOffsets(field, offsets, valueIsNull, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    Block arrayBlock = ArrayBlock.fromElementBlock(valueIsNull.size(), Optional.of(valueIsNull.toBooleanArray()), offsets.toIntArray(), columnChunk.getBlock());
    return new ColumnChunk(arrayBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) MapType(io.trino.spi.type.MapType) BooleanArrayList(it.unimi.dsi.fastutil.booleans.BooleanArrayList) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ArrayBlock(io.trino.spi.block.ArrayBlock) RowBlock(io.trino.spi.block.RowBlock) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Example 4 with GroupField

use of io.trino.parquet.GroupField in project trino by trinodb.

the class IcebergParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(FieldContext context, ColumnIO columnIO) {
    requireNonNull(context, "context is null");
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    Type type = context.getType();
    if (type instanceof RowType) {
        RowType rowType = (RowType) type;
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<RowType.Field> fields = rowType.getFields();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            RowType.Field rowField = fields.get(i);
            ColumnIdentity fieldIdentity = subColumns.get(i);
            Optional<Field> field = constructField(new FieldContext(rowField.getType(), fieldIdentity), lookupColumnById(groupColumnIO, fieldIdentity.getId()));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (type instanceof MapType) {
        MapType mapType = (MapType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 2, "Not a map: %s", context);
        ColumnIdentity keyIdentity = subColumns.get(0);
        ColumnIdentity valueIdentity = subColumns.get(1);
        // TODO validate column ID
        Optional<Field> keyField = constructField(new FieldContext(mapType.getKeyType(), keyIdentity), keyValueColumnIO.getChild(0));
        // TODO validate column ID
        Optional<Field> valueField = constructField(new FieldContext(mapType.getValueType(), valueIdentity), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (type instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) type;
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        List<ColumnIdentity> subColumns = context.getColumnIdentity().getChildren();
        checkArgument(subColumns.size() == 1, "Not an array: %s", context);
        ColumnIdentity elementIdentity = getOnlyElement(subColumns);
        // TODO validate column ID
        Optional<Field> field = constructField(new FieldContext(arrayType.getElementType(), elementIdentity), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.trino.parquet.GroupField) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) ArrayType(io.trino.spi.type.ArrayType) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) PrimitiveField(io.trino.parquet.PrimitiveField)

Aggregations

Field (io.trino.parquet.Field)4 GroupField (io.trino.parquet.GroupField)4 PrimitiveField (io.trino.parquet.PrimitiveField)4 ArrayType (io.trino.spi.type.ArrayType)3 MapType (io.trino.spi.type.MapType)3 RowType (io.trino.spi.type.RowType)3 Optional (java.util.Optional)3 ImmutableList (com.google.common.collect.ImmutableList)2 RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)2 ArrayBlock (io.trino.spi.block.ArrayBlock)2 Block (io.trino.spi.block.Block)2 RowBlock (io.trino.spi.block.RowBlock)2 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)2 Type (io.trino.spi.type.Type)2 BooleanList (it.unimi.dsi.fastutil.booleans.BooleanList)2 GroupColumnIO (org.apache.parquet.io.GroupColumnIO)2 PrimitiveColumnIO (org.apache.parquet.io.PrimitiveColumnIO)2 TypeSignatureParameter (io.trino.spi.type.TypeSignatureParameter)1 BooleanArrayList (it.unimi.dsi.fastutil.booleans.BooleanArrayList)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1