use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongInputStreamV2 method readPatchedBaseValues.
// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
// extract the number of fixed bits
int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
// extract the run length of data blob
int length = (firstByte & 0b1) << 8;
length |= input.read();
// runs are always one off
length += 1;
// extract the number of bytes occupied by base
int thirdByte = input.read();
int baseWidth = (thirdByte >>> 5) & 0b0111;
// base width is one off
baseWidth += 1;
// extract patch width
int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
// read fourth byte and extract patch gap width
int fourthByte = input.read();
int patchGapWidth = (fourthByte >>> 5) & 0b0111;
// patch gap width is one off
patchGapWidth += 1;
// extract the length of the patch list
int patchListLength = fourthByte & 0b1_1111;
// read the next base width number of bytes to extract base value
long base = bytesToLongBE(input, baseWidth);
long mask = (1L << ((baseWidth * 8) - 1));
// if MSB of base value is 1 then base is negative value else positive
if ((base & mask) != 0) {
base = base & ~mask;
base = -base;
}
// unpack the data blob
long[] unpacked = new long[length];
packer.unpack(unpacked, 0, length, fb, input);
// unpack the patch blob
long[] unpackedPatch = new long[patchListLength];
if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Invalid RLEv2 encoded stream");
}
int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIndex = 0;
long currentGap;
long currentPatch;
long patchMask = ((1L << patchWidth) - 1);
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
long actualGap = 0;
// if gap is <=255 then patch value cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// unpack data blob, patch it (if required), add base to get final result
for (int i = 0; i < unpacked.length; i++) {
if (i == actualGap) {
// extract the patch value
long patchedValue = unpacked[i] | (currentPatch << fb);
// add base to patched value
literals[numLiterals++] = base + patchedValue;
// increment the patch to point to next entry in patch list
patchIndex++;
if (patchIndex < patchListLength) {
// read the next gap and patch
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
actualGap = 0;
// <=255 then patch cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// next gap is relative to the current gap
actualGap += i;
}
} else {
// no patching required. add base to unpacked value to get final value
literals[numLiterals++] = base + unpacked[i];
}
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongInputStreamV2 method next.
@Override
public void next(int[] values, int items) throws IOException {
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcInputStream method readVarint.
public long readVarint(boolean signed) throws IOException {
long result = 0;
int shift = 0;
int available = available();
if (available >= 2 * Long.BYTES) {
long word = ByteArrays.getLong(buffer, position);
int count = 1;
boolean atEnd = false;
result = word & 0x7f;
if ((word & 0x80) != 0) {
long control = word >>> 8;
long mask = 0x7f << 7;
while (true) {
word = word >>> 1;
result |= word & mask;
count++;
if ((control & 0x80) == 0) {
atEnd = true;
break;
}
if (mask == 0x7fL << (7 * 7)) {
break;
}
mask = mask << 7;
control = control >>> 8;
}
if (!atEnd) {
word = ByteArrays.getLong(buffer, position + 8);
result |= (word & 0x7f) << 56;
if ((word & 0x80) == 0) {
count++;
} else {
result |= 1L << 63;
count += 2;
}
}
}
position += count;
} else {
do {
if (available == 0) {
advance();
available = available();
if (available == 0) {
throw new OrcCorruptionException(orcDataSourceId, "End of stream in RLE Integer");
}
}
available--;
result |= (long) (buffer[position] & 0x7f) << shift;
shift += 7;
} while ((buffer[position++] & 0x80) != 0);
}
if (signed) {
return zigzagDecode(result);
} else {
return result;
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcInputStream method advance.
// This comes from the Apache Hive ORC code
private void advance() throws IOException {
if (compressedSliceInput == null || compressedSliceInput.remaining() == 0) {
buffer = null;
position = 0;
length = 0;
uncompressedOffset = 0;
memoryUsage.setBytes(getRetainedSizeInBytes());
return;
}
// 3 byte header
// NOTE: this must match BLOCK_HEADER_SIZE
currentCompressedBlockOffset = toIntExact(compressedSliceInput.position());
int b0 = compressedSliceInput.readUnsignedByte();
int b1 = compressedSliceInput.readUnsignedByte();
int b2 = compressedSliceInput.readUnsignedByte();
boolean isUncompressed = (b0 & 0x01) == 1;
int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >>> 1);
if (chunkLength < 0 || chunkLength > compressedSliceInput.remaining()) {
throw new OrcCorruptionException(orcDataSourceId, "The chunkLength (%s) must not be negative or greater than remaining size (%s)", chunkLength, compressedSliceInput.remaining());
}
if (isUncompressed) {
buffer = ensureCapacity(buffer, chunkLength);
length = compressedSliceInput.read(buffer, 0, chunkLength);
if (dwrfDecryptor.isPresent()) {
buffer = dwrfDecryptor.get().decrypt(buffer, 0, chunkLength);
length = buffer.length;
}
position = 0;
} else {
sharedDecompressionBuffer.ensureCapacity(chunkLength);
byte[] compressedBuffer = sharedDecompressionBuffer.get();
int readCompressed = compressedSliceInput.read(compressedBuffer, 0, chunkLength);
if (dwrfDecryptor.isPresent()) {
compressedBuffer = dwrfDecryptor.get().decrypt(compressedBuffer, 0, chunkLength);
readCompressed = compressedBuffer.length;
}
length = decompressor.get().decompress(compressedBuffer, 0, readCompressed, bufferAdapter);
position = 0;
}
uncompressedOffset = position;
memoryUsage.setBytes(getRetainedSizeInBytes());
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcInputStream method seekToCheckpoint.
public boolean seekToCheckpoint(long checkpoint) throws IOException {
int compressedBlockOffset = decodeCompressedBlockOffset(checkpoint);
int decompressedOffset = decodeDecompressedOffset(checkpoint);
boolean discardedBuffer;
if (compressedBlockOffset != currentCompressedBlockOffset) {
if (!decompressor.isPresent() && !dwrfDecryptor.isPresent()) {
throw new OrcCorruptionException(orcDataSourceId, "Reset stream has a block offset but stream is not compressed or encrypted");
}
compressedSliceInput.setPosition(compressedBlockOffset);
buffer = new byte[0];
memoryUsage.setBytes(getRetainedSizeInBytes());
position = 0;
length = 0;
uncompressedOffset = 0;
discardedBuffer = true;
} else {
discardedBuffer = false;
}
if (decompressedOffset != position - uncompressedOffset) {
position = uncompressedOffset;
if (available() < decompressedOffset) {
decompressedOffset -= available();
advance();
}
position += decompressedOffset;
} else if (length == 0) {
advance();
position += decompressedOffset;
}
return discardedBuffer;
}
Aggregations