use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class DecimalStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
DecimalType decimalType = (DecimalType) type;
if (!rowGroupOpen) {
openRowGroup();
}
seekToOffset();
allocateVectors();
BlockBuilder builder = decimalType.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize);
if (presentStream == null) {
if (decimalStream == null) {
throw new OrcCorruptionException("Value is not null but decimal stream is not present");
}
if (scaleStream == null) {
throw new OrcCorruptionException("Value is not null but scale stream is not present");
}
Arrays.fill(nullVector, false);
scaleStream.nextLongVector(nextBatchSize, scaleVector);
if (decimalType.isShort()) {
decimalStream.nextShortDecimalVector(nextBatchSize, builder, decimalType, scaleVector);
} else {
decimalStream.nextLongDecimalVector(nextBatchSize, builder, decimalType, scaleVector);
}
} else {
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (decimalStream == null) {
throw new OrcCorruptionException("Value is not null but decimal stream is not present");
}
if (scaleStream == null) {
throw new OrcCorruptionException("Value is not null but scale stream is not present");
}
scaleStream.nextLongVector(nextBatchSize, scaleVector, nullVector);
if (decimalType.isShort()) {
decimalStream.nextShortDecimalVector(nextBatchSize, builder, decimalType, scaleVector, nullVector);
} else {
decimalStream.nextLongDecimalVector(nextBatchSize, builder, decimalType, scaleVector, nullVector);
}
} else {
for (int i = 0; i < nextBatchSize; i++) {
builder.appendNull();
}
}
}
readOffset = 0;
nextBatchSize = 0;
return builder.build();
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class DoubleStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.skip(readOffset);
}
}
BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize);
if (presentStream == null) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.nextVector(type, nextBatchSize, builder);
} else {
if (nullVector.length < nextBatchSize) {
nullVector = new boolean[nextBatchSize];
}
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.nextVector(type, nextBatchSize, builder, nullVector);
} else {
for (int i = 0; i < nextBatchSize; i++) {
builder.appendNull();
}
}
}
readOffset = 0;
nextBatchSize = 0;
return builder.build();
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class ListStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
long elementSkipSize = lengthStream.sum(readOffset);
elementStreamReader.prepareNextRead(toIntExact(elementSkipSize));
}
}
// The length vector could be reused, but this simplifies the code below by
// taking advantage of null entries being initialized to zero. The vector
// could be reinitialized for each loop, but that is likely just as expensive
// as allocating a new array
int[] lengthVector = new int[nextBatchSize];
boolean[] nullVector = new boolean[nextBatchSize];
if (presentStream == null) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
lengthStream.nextIntVector(nextBatchSize, lengthVector);
} else {
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
lengthStream.nextIntVector(nextBatchSize, lengthVector, nullVector);
}
}
int[] offsets = new int[nextBatchSize + 1];
for (int i = 1; i < offsets.length; i++) {
int length = lengthVector[i - 1];
offsets[i] = offsets[i - 1] + length;
}
Type elementType = type.getTypeParameters().get(0);
int elementCount = offsets[offsets.length - 1];
Block elements;
if (elementCount > 0) {
elementStreamReader.prepareNextRead(elementCount);
elements = elementStreamReader.readBlock(elementType);
} else {
elements = elementType.createBlockBuilder(new BlockBuilderStatus(), 0).build();
}
ArrayBlock arrayBlock = new ArrayBlock(nextBatchSize, nullVector, offsets, elements);
readOffset = 0;
nextBatchSize = 0;
return arrayBlock;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongStreamV2 method readPatchedBaseValues.
// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
// extract the number of fixed bits
int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
// extract the run length of data blob
int length = (firstByte & 0b1) << 8;
length |= input.read();
// runs are always one off
length += 1;
// extract the number of bytes occupied by base
int thirdByte = input.read();
int baseWidth = (thirdByte >>> 5) & 0b0111;
// base width is one off
baseWidth += 1;
// extract patch width
int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
// read fourth byte and extract patch gap width
int fourthByte = input.read();
int patchGapWidth = (fourthByte >>> 5) & 0b0111;
// patch gap width is one off
patchGapWidth += 1;
// extract the length of the patch list
int patchListLength = fourthByte & 0b1_1111;
// read the next base width number of bytes to extract base value
long base = bytesToLongBE(input, baseWidth);
long mask = (1L << ((baseWidth * 8) - 1));
// if MSB of base value is 1 then base is negative value else positive
if ((base & mask) != 0) {
base = base & ~mask;
base = -base;
}
// unpack the data blob
long[] unpacked = new long[length];
packer.unpack(unpacked, 0, length, fb, input);
// unpack the patch blob
long[] unpackedPatch = new long[patchListLength];
if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
throw new OrcCorruptionException("ORC file is corrupt");
}
int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIndex = 0;
long currentGap;
long currentPatch;
long patchMask = ((1L << patchWidth) - 1);
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
long actualGap = 0;
// if gap is <=255 then patch value cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// unpack data blob, patch it (if required), add base to get final result
for (int i = 0; i < unpacked.length; i++) {
if (i == actualGap) {
// extract the patch value
long patchedValue = unpacked[i] | (currentPatch << fb);
// add base to patched value
literals[numLiterals++] = base + patchedValue;
// increment the patch to point to next entry in patch list
patchIndex++;
if (patchIndex < patchListLength) {
// read the next gap and patch
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
actualGap = 0;
// <=255 then patch cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// next gap is relative to the current gap
actualGap += i;
}
} else {
// no patching required. add base to unpacked value to get final value
literals[numLiterals++] = base + unpacked[i];
}
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcInputStream method decompressZip.
// This comes from the Apache Hive ORC code
private int decompressZip(Slice in) throws IOException {
Inflater inflater = new Inflater(true);
try {
inflater.setInput((byte[]) in.getBase(), (int) (in.getAddress() - ARRAY_BYTE_BASE_OFFSET), in.length());
allocateOrGrowBuffer(in.length() * EXPECTED_COMPRESSION_RATIO, false);
int uncompressedLength = 0;
while (true) {
uncompressedLength += inflater.inflate(buffer, uncompressedLength, buffer.length - uncompressedLength);
if (inflater.finished() || buffer.length >= maxBufferSize) {
break;
}
int oldBufferSize = buffer.length;
allocateOrGrowBuffer(buffer.length * 2, true);
if (buffer.length <= oldBufferSize) {
throw new IllegalStateException(String.format("Buffer failed to grow. Old size %d, current size %d", oldBufferSize, buffer.length));
}
}
if (!inflater.finished()) {
throw new OrcCorruptionException("Could not decompress all input (output buffer too small?)");
}
return uncompressedLength;
} catch (DataFormatException e) {
throw new OrcCorruptionException(e, "Invalid compressed stream");
} finally {
inflater.end();
}
}
Aggregations