use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV1 method next.
@Override
public void next(int[] values, int inputItems) throws IOException {
int items = inputItems;
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
if (repeat) {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[0] + ((used + i) * delta);
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
} else {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV1 method readValues.
// This comes from the Apache Hive ORC code
private void readValues() throws IOException {
lastReadInputCheckpoint = input.getCheckpoint();
int control = input.read();
if (control == -1) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
}
if (control < 0x80) {
numLiterals = control + MIN_REPEAT_SIZE;
used = 0;
repeat = true;
delta = input.read();
if (delta == -1) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "End of stream in RLE Integer");
}
// convert from 0 to 255 to -128 to 127 by converting to a signed byte
delta = (byte) delta;
literals[0] = LongDecode.readVInt(signed, input);
} else {
numLiterals = 0x100 - control;
used = 0;
repeat = false;
for (int i = 0; i < numLiterals; ++i) {
literals[i] = LongDecode.readVInt(signed, input);
}
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV2 method readPatchedBaseValues.
// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
// extract the number of fixed bits
int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
// extract the run length of data blob
int length = (firstByte & 0b1) << 8;
length |= input.read();
// runs are always one off
length += 1;
// extract the number of bytes occupied by base
int thirdByte = input.read();
int baseWidth = (thirdByte >>> 5) & 0b0111;
// base width is one off
baseWidth += 1;
// extract patch width
int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
// read fourth byte and extract patch gap width
int fourthByte = input.read();
int patchGapWidth = (fourthByte >>> 5) & 0b0111;
// patch gap width is one off
patchGapWidth += 1;
// extract the length of the patch list
int patchListLength = fourthByte & 0b1_1111;
// read the next base width number of bytes to extract base value
long base = bytesToLongBE(input, baseWidth);
long mask = (1L << ((baseWidth * 8) - 1));
// if MSB of base value is 1 then base is negative value else positive
if ((base & mask) != 0) {
base = base & ~mask;
base = -base;
}
// unpack the data blob
long[] unpacked = new long[length];
packer.unpack(unpacked, 0, length, fb, input);
// unpack the patch blob
long[] unpackedPatch = new long[patchListLength];
if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Invalid RLEv2 encoded stream");
}
int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIndex = 0;
long currentGap;
long currentPatch;
long patchMask = ((1L << patchWidth) - 1);
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
long actualGap = 0;
// if gap is <=255 then patch value cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// unpack data blob, patch it (if required), add base to get final result
for (int i = 0; i < unpacked.length; i++) {
if (i == actualGap) {
// extract the patch value
long patchedValue = unpacked[i] | (currentPatch << fb);
// add base to patched value
literals[numLiterals++] = base + patchedValue;
// increment the patch to point to next entry in patch list
patchIndex++;
if (patchIndex < patchListLength) {
// read the next gap and patch
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
actualGap = 0;
// <=255 then patch cannot be 0
while (currentGap == 255 && currentPatch == 0) {
actualGap += 255;
patchIndex++;
currentGap = unpackedPatch[patchIndex] >>> patchWidth;
currentPatch = unpackedPatch[patchIndex] & patchMask;
}
// add the left over gap
actualGap += currentGap;
// next gap is relative to the current gap
actualGap += i;
}
} else {
// no patching required. add base to unpacked value to get final value
literals[numLiterals++] = base + unpacked[i];
}
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV2 method next.
@Override
public void next(int[] values, int inputItems) throws IOException {
int items = inputItems;
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class OrcInputStream method readFully.
public void readFully(byte[] buffer, int inputOffset, int length) throws IOException {
int offset = inputOffset;
while (offset < length) {
int result = read(buffer, offset, length - offset);
if (result < 0) {
throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
}
offset += result;
}
}
Aggregations