Search in sources :

Example 26 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class CompressedOrcChunkLoader method ensureCompressedBytesAvailable.

private void ensureCompressedBytesAvailable(int size) throws IOException {
    // is this within the current buffer?
    if (size <= compressedBufferStream.remaining()) {
        return;
    }
    // is this a read larger than the buffer
    if (size > dataReader.getMaxBufferSize()) {
        throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read size (%s bytes) is greater than max buffer size (%s bytes", size, dataReader.getMaxBufferSize());
    }
    // is this a read past the end of the stream
    if (compressedBufferStart + compressedBufferStream.position() + size > dataReader.getSize()) {
        throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Read past end of stream");
    }
    compressedBufferStart = compressedBufferStart + toIntExact(compressedBufferStream.position());
    Slice compressedBuffer = dataReader.seekBuffer(compressedBufferStart);
    dataReaderMemoryUsage.setBytes(dataReader.getRetainedSize());
    if (compressedBuffer.length() < size) {
        throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read of %s bytes but only %s were bytes", size, compressedBuffer.length());
    }
    compressedBufferStream = compressedBuffer.getInput();
}
Also used : Slice(io.airlift.slice.Slice) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 27 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class DecimalInputStream method nextLongDecimal.

// result must have at least batchSize * 2 capacity
@SuppressWarnings("PointlessBitwiseExpression")
public void nextLongDecimal(long[] result, int batchSize) throws IOException {
    verify(result.length >= batchSize * 2);
    int count = 0;
    while (count < batchSize) {
        if (blockOffset == block.length()) {
            advance();
        }
        while (blockOffset <= block.length() - 20) {
            // we'll read 2 longs + 1 int
            long low;
            long middle = 0;
            int high = 0;
            // low bits
            long current = block.getLong(blockOffset);
            int zeros = Long.numberOfTrailingZeros(~current & LONG_MASK);
            int end = (zeros + 1) / 8;
            blockOffset += end;
            boolean negative = (current & 1) == 1;
            low = (current & 0x7F_00_00_00_00_00_00_00L) >>> 7;
            low |= (current & 0x7F_00_00_00_00_00_00L) >>> 6;
            low |= (current & 0x7F_00_00_00_00_00L) >>> 5;
            low |= (current & 0x7F_00_00_00_00L) >>> 4;
            low |= (current & 0x7F_00_00_00) >>> 3;
            low |= (current & 0x7F_00_00) >>> 2;
            low |= (current & 0x7F_00) >>> 1;
            low |= (current & 0x7F) >>> 0;
            low = low & ((1L << (end * 7)) - 1);
            // middle bits
            if (zeros == 64) {
                current = block.getLong(blockOffset);
                zeros = Long.numberOfTrailingZeros(~current & LONG_MASK);
                end = (zeros + 1) / 8;
                blockOffset += end;
                middle = (current & 0x7F_00_00_00_00_00_00_00L) >>> 7;
                middle |= (current & 0x7F_00_00_00_00_00_00L) >>> 6;
                middle |= (current & 0x7F_00_00_00_00_00L) >>> 5;
                middle |= (current & 0x7F_00_00_00_00L) >>> 4;
                middle |= (current & 0x7F_00_00_00) >>> 3;
                middle |= (current & 0x7F_00_00) >>> 2;
                middle |= (current & 0x7F_00) >>> 1;
                middle |= (current & 0x7F) >>> 0;
                middle = middle & ((1L << (end * 7)) - 1);
                // high bits
                if (zeros == 64) {
                    int last = block.getInt(blockOffset);
                    zeros = Integer.numberOfTrailingZeros(~last & INT_MASK);
                    end = (zeros + 1) / 8;
                    blockOffset += end;
                    high = (last & 0x7F_00_00) >>> 2;
                    high |= (last & 0x7F_00) >>> 1;
                    high |= (last & 0x7F) >>> 0;
                    high = high & ((1 << (end * 7)) - 1);
                    if (end == 4 || high > 0xFF_FF) {
                        // only 127 - (55 + 56) = 16 bits allowed in high
                        throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal exceeds 128 bits");
                    }
                }
            }
            emitLongDecimal(result, count, low, middle, high, negative);
            count++;
            if (count == batchSize) {
                return;
            }
        }
        // handle the tail of the current block
        count = decodeLongDecimalTail(result, count, batchSize);
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) DecimalStreamCheckpoint(io.trino.orc.checkpoint.DecimalStreamCheckpoint)

Example 28 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class DecimalInputStream method decodeShortDecimalTail.

private int decodeShortDecimalTail(long[] result, int count, int batchSize) throws IOException {
    long low = 0;
    long high = 0;
    long value;
    boolean last = false;
    int offset = 0;
    if (blockOffset == block.length()) {
        advance();
    }
    while (true) {
        value = block.getByte(blockOffset);
        blockOffset++;
        if (offset == 0) {
            low |= (value & 0x7F);
        } else if (offset < 8) {
            low |= (value & 0x7F) << (offset * 7);
        } else if (offset < 11) {
            high |= (value & 0x7F) << ((offset - 8) * 7);
        } else {
            throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
        }
        offset++;
        if ((value & 0x80) == 0) {
            if (high > 0xFF) {
                // only 63 - (55) = 8 bits allowed in high
                throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
            }
            emitShortDecimal(result, count, low, high);
            count++;
            low = 0;
            high = 0;
            offset = 0;
            if (blockOffset == block.length()) {
                // reset the block and loop around to optimized decoding
                break;
            }
            if (last || count == batchSize) {
                break;
            }
        } else if (blockOffset == block.length()) {
            last = true;
            advance();
        }
    }
    return count;
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) DecimalStreamCheckpoint(io.trino.orc.checkpoint.DecimalStreamCheckpoint)

Example 29 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class BooleanColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ByteArrayBlock(io.trino.spi.block.ByteArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 30 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class FloatColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(REAL, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(REAL, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : IntArrayBlock(io.trino.spi.block.IntArrayBlock) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Aggregations

OrcCorruptionException (io.trino.orc.OrcCorruptionException)33 Block (io.trino.spi.block.Block)13 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)11 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)4 InputStreamCheckpoint.createInputStreamCheckpoint (io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)4 LongStreamV2Checkpoint (io.trino.orc.checkpoint.LongStreamV2Checkpoint)4 ByteArrayBlock (io.trino.spi.block.ByteArrayBlock)4 LongArrayBlock (io.trino.spi.block.LongArrayBlock)4 Slice (io.airlift.slice.Slice)3 LongStreamV1Checkpoint (io.trino.orc.checkpoint.LongStreamV1Checkpoint)3 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)2 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 IntArrayBlock (io.trino.spi.block.IntArrayBlock)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowBlock (io.trino.spi.block.RowBlock)2 VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)2 RowType (io.trino.spi.type.RowType)2 Type (io.trino.spi.type.Type)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1