use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class CompressedOrcChunkLoader method ensureCompressedBytesAvailable.
private void ensureCompressedBytesAvailable(int size) throws IOException {
// is this within the current buffer?
if (size <= compressedBufferStream.remaining()) {
return;
}
// is this a read larger than the buffer
if (size > dataReader.getMaxBufferSize()) {
throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read size (%s bytes) is greater than max buffer size (%s bytes", size, dataReader.getMaxBufferSize());
}
// is this a read past the end of the stream
if (compressedBufferStart + compressedBufferStream.position() + size > dataReader.getSize()) {
throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Read past end of stream");
}
compressedBufferStart = compressedBufferStart + toIntExact(compressedBufferStream.position());
Slice compressedBuffer = dataReader.seekBuffer(compressedBufferStart);
dataReaderMemoryUsage.setBytes(dataReader.getRetainedSize());
if (compressedBuffer.length() < size) {
throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read of %s bytes but only %s were bytes", size, compressedBuffer.length());
}
compressedBufferStream = compressedBuffer.getInput();
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class DecimalInputStream method nextLongDecimal.
// result must have at least batchSize * 2 capacity
@SuppressWarnings("PointlessBitwiseExpression")
public void nextLongDecimal(long[] result, int batchSize) throws IOException {
verify(result.length >= batchSize * 2);
int count = 0;
while (count < batchSize) {
if (blockOffset == block.length()) {
advance();
}
while (blockOffset <= block.length() - 20) {
// we'll read 2 longs + 1 int
long low;
long middle = 0;
int high = 0;
// low bits
long current = block.getLong(blockOffset);
int zeros = Long.numberOfTrailingZeros(~current & LONG_MASK);
int end = (zeros + 1) / 8;
blockOffset += end;
boolean negative = (current & 1) == 1;
low = (current & 0x7F_00_00_00_00_00_00_00L) >>> 7;
low |= (current & 0x7F_00_00_00_00_00_00L) >>> 6;
low |= (current & 0x7F_00_00_00_00_00L) >>> 5;
low |= (current & 0x7F_00_00_00_00L) >>> 4;
low |= (current & 0x7F_00_00_00) >>> 3;
low |= (current & 0x7F_00_00) >>> 2;
low |= (current & 0x7F_00) >>> 1;
low |= (current & 0x7F) >>> 0;
low = low & ((1L << (end * 7)) - 1);
// middle bits
if (zeros == 64) {
current = block.getLong(blockOffset);
zeros = Long.numberOfTrailingZeros(~current & LONG_MASK);
end = (zeros + 1) / 8;
blockOffset += end;
middle = (current & 0x7F_00_00_00_00_00_00_00L) >>> 7;
middle |= (current & 0x7F_00_00_00_00_00_00L) >>> 6;
middle |= (current & 0x7F_00_00_00_00_00L) >>> 5;
middle |= (current & 0x7F_00_00_00_00L) >>> 4;
middle |= (current & 0x7F_00_00_00) >>> 3;
middle |= (current & 0x7F_00_00) >>> 2;
middle |= (current & 0x7F_00) >>> 1;
middle |= (current & 0x7F) >>> 0;
middle = middle & ((1L << (end * 7)) - 1);
// high bits
if (zeros == 64) {
int last = block.getInt(blockOffset);
zeros = Integer.numberOfTrailingZeros(~last & INT_MASK);
end = (zeros + 1) / 8;
blockOffset += end;
high = (last & 0x7F_00_00) >>> 2;
high |= (last & 0x7F_00) >>> 1;
high |= (last & 0x7F) >>> 0;
high = high & ((1 << (end * 7)) - 1);
if (end == 4 || high > 0xFF_FF) {
// only 127 - (55 + 56) = 16 bits allowed in high
throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal exceeds 128 bits");
}
}
}
emitLongDecimal(result, count, low, middle, high, negative);
count++;
if (count == batchSize) {
return;
}
}
// handle the tail of the current block
count = decodeLongDecimalTail(result, count, batchSize);
}
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class DecimalInputStream method decodeShortDecimalTail.
private int decodeShortDecimalTail(long[] result, int count, int batchSize) throws IOException {
long low = 0;
long high = 0;
long value;
boolean last = false;
int offset = 0;
if (blockOffset == block.length()) {
advance();
}
while (true) {
value = block.getByte(blockOffset);
blockOffset++;
if (offset == 0) {
low |= (value & 0x7F);
} else if (offset < 8) {
low |= (value & 0x7F) << (offset * 7);
} else if (offset < 11) {
high |= (value & 0x7F) << ((offset - 8) * 7);
} else {
throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
}
offset++;
if ((value & 0x80) == 0) {
if (high > 0xFF) {
// only 63 - (55) = 8 bits allowed in high
throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
}
emitShortDecimal(result, count, low, high);
count++;
low = 0;
high = 0;
offset = 0;
if (blockOffset == block.length()) {
// reset the block and loop around to optimized decoding
break;
}
if (last || count == batchSize) {
break;
}
} else if (blockOffset == block.length()) {
last = true;
advance();
}
}
return count;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class BooleanColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class FloatColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(REAL, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(REAL, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
Aggregations