Search in sources :

Example 1 with Field

use of io.prestosql.parquet.Field in project hetu-core by openlookeng.

the class ParquetReader method readStruct.

private ColumnChunk readStruct(GroupField field) throws IOException {
    List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
    Block[] localBlocks = new Block[fields.size()];
    ColumnChunk columnChunk = null;
    List<Optional<Field>> parameters = field.getChildren();
    for (int i = 0; i < fields.size(); i++) {
        Optional<Field> parameter = parameters.get(i);
        if (parameter.isPresent()) {
            columnChunk = readColumnChunk(parameter.get());
            localBlocks[i] = columnChunk.getBlock();
        }
    }
    for (int i = 0; i < fields.size(); i++) {
        if (localBlocks[i] == null) {
            localBlocks[i] = RunLengthEncodedBlock.create(field.getType(), null, columnChunk.getBlock().getPositionCount());
        }
    }
    BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    boolean[] structIsNullVector = structIsNull.toBooleanArray();
    Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), localBlocks);
    return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) Optional(java.util.Optional) TypeSignatureParameter(io.prestosql.spi.type.TypeSignatureParameter) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) ArrayBlock(io.prestosql.spi.block.ArrayBlock) RowBlock(io.prestosql.spi.block.RowBlock)

Example 2 with Field

use of io.prestosql.parquet.Field in project hetu-core by openlookeng.

the class ParquetPageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        batchId++;
        int batchSize = parquetReader.nextBatch();
        if (closed || batchSize <= 0) {
            close();
            return null;
        }
        Block[] blocks = new Block[hiveColumnIndexes.length];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            if (constantBlocks[fieldId] != null) {
                blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize);
            } else {
                Type type = types.get(fieldId);
                Optional<Field> field = fields.get(fieldId);
                int fieldIndex;
                if (useParquetColumnNames) {
                    fieldIndex = getFieldIndex(fileSchema, columnNames.get(fieldId));
                } else {
                    fieldIndex = hiveColumnIndexes[fieldId];
                }
                if (fieldIndex != -1 && field.isPresent()) {
                    blocks[fieldId] = new LazyBlock(batchSize, new ParquetBlockLoader(field.get()));
                } else {
                    blocks[fieldId] = RunLengthEncodedBlock.create(type, null, batchSize);
                }
            }
        }
        return new Page(batchSize, blocks);
    } catch (PrestoException e) {
        closeWithSuppression(e);
        throw e;
    } catch (RuntimeException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
}
Also used : Field(io.prestosql.parquet.Field) Type(io.prestosql.spi.type.Type) MessageType(org.apache.parquet.schema.MessageType) LazyBlock(io.prestosql.spi.block.LazyBlock) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) LazyBlock(io.prestosql.spi.block.LazyBlock) Page(io.prestosql.spi.Page) PrestoException(io.prestosql.spi.PrestoException)

Example 3 with Field

use of io.prestosql.parquet.Field in project hetu-core by openlookeng.

the class ParquetReader method readArray.

private ColumnChunk readArray(GroupField field) throws IOException {
    List<Type> parameters = field.getType().getTypeParameters();
    checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
    Field elementField = field.getChildren().get(0).get();
    ColumnChunk columnChunk = readColumnChunk(elementField);
    IntList offsets = new IntArrayList();
    BooleanList valueIsNull = new BooleanArrayList();
    calculateCollectionOffsets(field, offsets, valueIsNull, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    Block arrayBlock = ArrayBlock.fromElementBlock(valueIsNull.size(), Optional.of(valueIsNull.toBooleanArray()), offsets.toIntArray(), columnChunk.getBlock());
    return new ColumnChunk(arrayBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) MapType(io.prestosql.spi.type.MapType) Type(io.prestosql.spi.type.Type) BooleanArrayList(it.unimi.dsi.fastutil.booleans.BooleanArrayList) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) ArrayBlock(io.prestosql.spi.block.ArrayBlock) RowBlock(io.prestosql.spi.block.RowBlock) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IntList(it.unimi.dsi.fastutil.ints.IntList)

Example 4 with Field

use of io.prestosql.parquet.Field in project hetu-core by openlookeng.

the class ParquetColumnIOConverter method constructField.

public static Optional<Field> constructField(Type type, ColumnIO columnIO) {
    if (columnIO == null) {
        return Optional.empty();
    }
    boolean required = columnIO.getType().getRepetition() != OPTIONAL;
    int repetitionLevel = columnRepetitionLevel(columnIO);
    int definitionLevel = columnDefinitionLevel(columnIO);
    if (ROW.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        List<Type> parameters = type.getTypeParameters();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        List<TypeSignatureParameter> fields = type.getTypeSignature().getParameters();
        boolean structHasParameters = false;
        for (int i = 0; i < fields.size(); i++) {
            NamedTypeSignature namedTypeSignature = fields.get(i).getNamedTypeSignature();
            String name = namedTypeSignature.getName().get().toLowerCase(Locale.ENGLISH);
            Optional<Field> field = constructField(parameters.get(i), lookupColumnByName(groupColumnIO, name));
            structHasParameters |= field.isPresent();
            fieldsBuilder.add(field);
        }
        if (structHasParameters) {
            return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, fieldsBuilder.build()));
        }
        return Optional.empty();
    }
    if (MAP.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        MapType mapType = (MapType) type;
        GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
        if (keyValueColumnIO.getChildrenCount() != 2) {
            return Optional.empty();
        }
        Optional<Field> keyField = constructField(mapType.getKeyType(), keyValueColumnIO.getChild(0));
        Optional<Field> valueField = constructField(mapType.getValueType(), keyValueColumnIO.getChild(1));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField)));
    }
    if (ARRAY.equals(type.getTypeSignature().getBase())) {
        GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
        List<Type> types = type.getTypeParameters();
        if (groupColumnIO.getChildrenCount() != 1) {
            return Optional.empty();
        }
        Optional<Field> field = constructField(types.get(0), getArrayElementColumn(groupColumnIO.getChild(0)));
        return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));
    }
    PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
    RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
    return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
}
Also used : Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) GroupField(io.prestosql.parquet.GroupField) RichColumnDescriptor(io.prestosql.parquet.RichColumnDescriptor) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) MapType(io.prestosql.spi.type.MapType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) GroupField(io.prestosql.parquet.GroupField) PrimitiveField(io.prestosql.parquet.PrimitiveField) Field(io.prestosql.parquet.Field) MapType(io.prestosql.spi.type.MapType) Type(io.prestosql.spi.type.Type) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) TypeSignatureParameter(io.prestosql.spi.type.TypeSignatureParameter) PrimitiveField(io.prestosql.parquet.PrimitiveField)

Aggregations

Field (io.prestosql.parquet.Field)4 GroupField (io.prestosql.parquet.GroupField)3 PrimitiveField (io.prestosql.parquet.PrimitiveField)3 Block (io.prestosql.spi.block.Block)3 RunLengthEncodedBlock (io.prestosql.spi.block.RunLengthEncodedBlock)3 Type (io.prestosql.spi.type.Type)3 ArrayBlock (io.prestosql.spi.block.ArrayBlock)2 RowBlock (io.prestosql.spi.block.RowBlock)2 MapType (io.prestosql.spi.type.MapType)2 TypeSignatureParameter (io.prestosql.spi.type.TypeSignatureParameter)2 BooleanList (it.unimi.dsi.fastutil.booleans.BooleanList)2 Optional (java.util.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)1 RichColumnDescriptor (io.prestosql.parquet.RichColumnDescriptor)1 Page (io.prestosql.spi.Page)1 PrestoException (io.prestosql.spi.PrestoException)1 LazyBlock (io.prestosql.spi.block.LazyBlock)1 NamedTypeSignature (io.prestosql.spi.type.NamedTypeSignature)1 BooleanArrayList (it.unimi.dsi.fastutil.booleans.BooleanArrayList)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1