use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class OrcInputStream method readFully.
public void readFully(byte[] buffer, int offset, int length) throws IOException {
while (offset < length) {
int result = read(buffer, offset, length - offset);
if (result < 0) {
throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
}
offset += result;
}
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class LongInputStreamV2 method readPatchedBaseValues.
// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
// extract the number of fixed bits
int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
// extract the run length of data blob
int length = (firstByte & 0b1) << 8;
length |= input.read();
// runs are always one off
length += 1;
// extract the number of bytes occupied by base
int thirdByte = input.read();
int baseWidth = (thirdByte >>> 5) & 0b0111;
// base width is one off
baseWidth += 1;
// extract patch width
int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
// read fourth byte and extract patch gap width
int fourthByte = input.read();
int patchGapWidth = (fourthByte >>> 5) & 0b0111;
// patch gap width is one off
patchGapWidth += 1;
// extract the length of the patch list
int patchListLength = fourthByte & 0b1_1111;
// read the next base width number of bytes to extract base value
long base = bytesToLongBE(input, baseWidth);
long mask = (1L << ((baseWidth * 8) - 1));
// if MSB of base value is 1 then base is negative value else positive
if ((base & mask) != 0) {
base = base & ~mask;
base = -base;
}
// unpack the data blob
long[] unpacked = new long[length];
packer.unpack(unpacked, 0, length, fb, input);
// unpack the patch blob
long[] unpackedPatch = new long[patchListLength];
if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Invalid RLEv2 encoded stream");
}
int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIndex = 0;
long currentGap;
long currentPatch;
long patchMask = ((1L << patchWidth) - 1);
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
long actualGap = 0;
// if gap is <=255 then patch value cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// unpack data blob, patch it (if required), add base to get final result
for (int i = 0; i < unpacked.length; i++) {
if (i == actualGap) {
// extract the patch value
long patchedValue = unpacked[i] | (currentPatch << fb);
// add base to patched value
literals[numLiterals++] = base + patchedValue;
// increment the patch to point to next entry in patch list
patchIndex++;
if (patchIndex < patchListLength) {
// read the next gap and patch
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
actualGap = 0;
// <=255 then patch cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// next gap is relative to the current gap
actualGap += i;
}
} else {
// no patching required. add base to unpacked value to get final value
literals[numLiterals++] = base + unpacked[i];
}
}
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class LongInputStreamV2 method next.
@Override
public void next(int[] values, int items) throws IOException {
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class ByteColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class DecimalColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
seekToOffset();
Block block;
if (decimalStream == null && scaleStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
} else if (presentStream == null) {
checkDataStreamsArePresent();
block = readNonNullBlock();
} else {
checkDataStreamsArePresent();
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
Aggregations