Search in sources :

Example 31 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class ListColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            long elementSkipSize = lengthStream.sum(readOffset);
            elementColumnReader.prepareNextRead(toIntExact(elementSkipSize));
        }
    }
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    boolean[] nullVector = null;
    if (presentStream == null) {
        if (lengthStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
        }
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        nullVector = new boolean[nextBatchSize];
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            lengthStream.next(offsetVector, nextBatchSize - nullValues);
            unpackLengthNulls(offsetVector, nullVector, nextBatchSize - nullValues);
        }
    }
    convertLengthVectorToOffsetVector(offsetVector);
    int elementCount = offsetVector[offsetVector.length - 1];
    Block elements;
    if (elementCount > 0) {
        elementColumnReader.prepareNextRead(elementCount);
        elements = blockFactory.createBlock(elementCount, elementColumnReader::readBlock, true);
    } else {
        elements = elementType.createBlockBuilder(null, 0).build();
    }
    Block arrayBlock = ArrayBlock.fromElementBlock(nextBatchSize, Optional.ofNullable(nullVector), offsetVector, elements);
    readOffset = 0;
    nextBatchSize = 0;
    return arrayBlock;
}
Also used : Block(io.trino.spi.block.Block) ArrayBlock(io.trino.spi.block.ArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 32 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class TimestampColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            verifyStreamsPresent();
            secondsStream.skip(readOffset);
            nanosStream.skip(readOffset);
        }
    }
    Block block;
    if (secondsStream == null && nanosStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull);
        } else {
            block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) Int96ArrayBlock(io.trino.spi.block.Int96ArrayBlock) LongArrayBlock(io.trino.spi.block.LongArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 33 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class UnionColumnReader method getBlocks.

private Block[] getBlocks(int positionCount) throws IOException {
    if (dataStream == null) {
        throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
    }
    Block[] blocks = new Block[fieldReaders.size() + 1];
    byte[] tags = dataStream.next(positionCount);
    blocks[0] = new ByteArrayBlock(positionCount, Optional.empty(), tags);
    boolean[][] valueIsNonNull = new boolean[fieldReaders.size()][positionCount];
    int[] nonNullValueCount = new int[fieldReaders.size()];
    for (int i = 0; i < positionCount; i++) {
        valueIsNonNull[tags[i]][i] = true;
        nonNullValueCount[tags[i]]++;
    }
    for (int i = 0; i < fieldReaders.size(); i++) {
        Type fieldType = type.getTypeParameters().get(i + 1);
        if (nonNullValueCount[i] > 0) {
            ColumnReader reader = fieldReaders.get(i);
            reader.prepareNextRead(nonNullValueCount[i]);
            Block rawBlock = blockFactory.createBlock(nonNullValueCount[i], reader::readBlock, true);
            blocks[i + 1] = new LazyBlock(positionCount, new UnpackLazyBlockLoader(rawBlock, fieldType, valueIsNonNull[i]));
        } else {
            blocks[i + 1] = new RunLengthEncodedBlock(fieldType.createBlockBuilder(null, 1).appendNull().build(), positionCount);
        }
    }
    return blocks;
}
Also used : Type(io.trino.spi.type.Type) RowType(io.trino.spi.type.RowType) ReaderUtils.verifyStreamType(io.trino.orc.reader.ReaderUtils.verifyStreamType) LazyBlock(io.trino.spi.block.LazyBlock) LazyBlock(io.trino.spi.block.LazyBlock) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) RowBlock(io.trino.spi.block.RowBlock) ByteArrayBlock(io.trino.spi.block.ByteArrayBlock) ByteArrayBlock(io.trino.spi.block.ByteArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException) ColumnReaders.createColumnReader(io.trino.orc.reader.ColumnReaders.createColumnReader) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock)

Aggregations

OrcCorruptionException (io.trino.orc.OrcCorruptionException)33 Block (io.trino.spi.block.Block)13 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)11 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)4 InputStreamCheckpoint.createInputStreamCheckpoint (io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)4 LongStreamV2Checkpoint (io.trino.orc.checkpoint.LongStreamV2Checkpoint)4 ByteArrayBlock (io.trino.spi.block.ByteArrayBlock)4 LongArrayBlock (io.trino.spi.block.LongArrayBlock)4 Slice (io.airlift.slice.Slice)3 LongStreamV1Checkpoint (io.trino.orc.checkpoint.LongStreamV1Checkpoint)3 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)2 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 IntArrayBlock (io.trino.spi.block.IntArrayBlock)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowBlock (io.trino.spi.block.RowBlock)2 VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)2 RowType (io.trino.spi.type.RowType)2 Type (io.trino.spi.type.Type)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1