Search in sources :

Example 1 with GroupField

use of io.prestosql.parquet.GroupField in project hetu-core by openlookeng.

the class ParquetReader method readStruct.

private ColumnChunk readStruct(GroupField field) throws IOException {
    List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
    Block[] localBlocks = new Block[fields.size()];
    ColumnChunk columnChunk = null;
    List<Optional<Field>> parameters = field.getChildren();
    for (int i = 0; i < fields.size(); i++) {
        Optional<Field> parameter = parameters.get(i);
        if (parameter.isPresent()) {
            columnChunk = readColumnChunk(parameter.get());
            localBlocks[i] = columnChunk.getBlock();
        }
    }
    for (int i = 0; i < fields.size(); i++) {
        if (localBlocks[i] == null) {
            localBlocks[i] = RunLengthEncodedBlock.create(field.getType(), null, columnChunk.getBlock().getPositionCount());
        }
    }
    BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    boolean[] structIsNullVector = structIsNull.toBooleanArray();
    Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), localBlocks);
    return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) Optional(java.util.Optional) TypeSignatureParameter(io.prestosql.spi.type.TypeSignatureParameter) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) ArrayBlock(io.prestosql.spi.block.ArrayBlock) RowBlock(io.prestosql.spi.block.RowBlock)

Example 2 with GroupField

use of io.prestosql.parquet.GroupField in project hetu-core by openlookeng.

the class ParquetReader method readArray.

private ColumnChunk readArray(GroupField field) throws IOException {
    List<Type> parameters = field.getType().getTypeParameters();
    checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
    Field elementField = field.getChildren().get(0).get();
    ColumnChunk columnChunk = readColumnChunk(elementField);
    IntList offsets = new IntArrayList();
    BooleanList valueIsNull = new BooleanArrayList();
    calculateCollectionOffsets(field, offsets, valueIsNull, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    Block arrayBlock = ArrayBlock.fromElementBlock(valueIsNull.size(), Optional.of(valueIsNull.toBooleanArray()), offsets.toIntArray(), columnChunk.getBlock());
    return new ColumnChunk(arrayBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) MapType(io.prestosql.spi.type.MapType) Type(io.prestosql.spi.type.Type) BooleanArrayList(it.unimi.dsi.fastutil.booleans.BooleanArrayList) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) ArrayBlock(io.prestosql.spi.block.ArrayBlock) RowBlock(io.prestosql.spi.block.RowBlock) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Example 3 with GroupField

use of io.prestosql.parquet.GroupField in project hetu-core by openlookeng.

the class ParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(Type type, ColumnIO columnIO) {
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    if (ROW.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        List<Type> parameters = type.getTypeParameters();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<TypeSignatureParameter> fields = type.getTypeSignature().getParameters();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            NamedTypeSignature namedTypeSignature = fields.get(i).getNamedTypeSignature();
            String name = namedTypeSignature.getName().get().toLowerCase(Locale.ENGLISH);
            Optional<Field> field = constructField(parameters.get(i), lookupColumnByName(groupColumnIO, name));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (MAP.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        MapType mapType = (MapType) type;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        Optional<Field> keyField = constructField(mapType.getKeyType(), keyValueColumnIO.getChild(0));
        Optional<Field> valueField = constructField(mapType.getValueType(), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (ARRAY.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        List<Type> types = type.getTypeParameters();
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        Optional<Field> field = constructField(types.get(0), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.prestosql.parquet.GroupField) RichColumnDescriptor(io.prestosql.parquet.RichColumnDescriptor) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) MapType(io.prestosql.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) MapType(io.prestosql.spi.type.MapType) Type(io.prestosql.spi.type.Type) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) TypeSignatureParameter(io.prestosql.spi.type.TypeSignatureParameter) PrimitiveField(io.prestosql.parquet.PrimitiveField)

Aggregations

Field (io.prestosql.parquet.Field)3 GroupField (io.prestosql.parquet.GroupField)3 PrimitiveField (io.prestosql.parquet.PrimitiveField)3 ArrayBlock (io.prestosql.spi.block.ArrayBlock)2 Block (io.prestosql.spi.block.Block)2 RowBlock (io.prestosql.spi.block.RowBlock)2 RunLengthEncodedBlock (io.prestosql.spi.block.RunLengthEncodedBlock)2 MapType (io.prestosql.spi.type.MapType)2 Type (io.prestosql.spi.type.Type)2 TypeSignatureParameter (io.prestosql.spi.type.TypeSignatureParameter)2 BooleanList (it.unimi.dsi.fastutil.booleans.BooleanList)2 Optional (java.util.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)1 RichColumnDescriptor (io.prestosql.parquet.RichColumnDescriptor)1 NamedTypeSignature (io.prestosql.spi.type.NamedTypeSignature)1 BooleanArrayList (it.unimi.dsi.fastutil.booleans.BooleanArrayList)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1 IntList (it.unimi.dsi.fastutil.ints.IntList)1 GroupColumnIO (org.apache.parquet.io.GroupColumnIO)1 PrimitiveColumnIO (org.apache.parquet.io.PrimitiveColumnIO)1