Search in sources :

Example 16 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongInputStreamV2 method readPatchedBaseValues.

// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
    // extract the number of fixed bits
    int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
    // extract the run length of data blob
    int length = (firstByte & 0b1) << 8;
    length |= input.read();
    // runs are always one off
    length += 1;
    // extract the number of bytes occupied by base
    int thirdByte = input.read();
    int baseWidth = (thirdByte >>> 5) & 0b0111;
    // base width is one off
    baseWidth += 1;
    // extract patch width
    int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
    // read fourth byte and extract patch gap width
    int fourthByte = input.read();
    int patchGapWidth = (fourthByte >>> 5) & 0b0111;
    // patch gap width is one off
    patchGapWidth += 1;
    // extract the length of the patch list
    int patchListLength = fourthByte & 0b1_1111;
    // read the next base width number of bytes to extract base value
    long base = bytesToLongBE(input, baseWidth);
    long mask = (1L << ((baseWidth * 8) - 1));
    // if MSB of base value is 1 then base is negative value else positive
    if ((base & mask) != 0) {
        base = base & ~mask;
        base = -base;
    }
    // unpack the data blob
    long[] unpacked = new long[length];
    packer.unpack(unpacked, 0, length, fb, input);
    // unpack the patch blob
    long[] unpackedPatch = new long[patchListLength];
    if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Invalid RLEv2 encoded stream");
    }
    int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
    packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
    // apply the patch directly when decoding the packed data
    int patchIndex = 0;
    long currentGap;
    long currentPatch;
    long patchMask = ((1L << patchWidth) - 1);
    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
    currentPatch = unpackedPatch[patchIndex] & patchMask;
    long actualGap = 0;
    // if gap is <=255 then patch value cannot be 0
    while (currentGap == 255 && currentPatch == 0) {
        actualGap += 255;
        patchIndex++;
        currentGap = unpackedPatch[patchIndex] >>> patchWidth;
        currentPatch = unpackedPatch[patchIndex] & patchMask;
    }
    // add the left over gap
    actualGap += currentGap;
    // unpack data blob, patch it (if required), add base to get final result
    for (int i = 0; i < unpacked.length; i++) {
        if (i == actualGap) {
            // extract the patch value
            long patchedValue = unpacked[i] | (currentPatch << fb);
            // add base to patched value
            literals[numLiterals++] = base + patchedValue;
            // increment the patch to point to next entry in patch list
            patchIndex++;
            if (patchIndex < patchListLength) {
                // read the next gap and patch
                currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                currentPatch = unpackedPatch[patchIndex] & patchMask;
                actualGap = 0;
                // <=255 then patch cannot be 0
                while (currentGap == 255 && currentPatch == 0) {
                    actualGap += 255;
                    patchIndex++;
                    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                    currentPatch = unpackedPatch[patchIndex] & patchMask;
                }
                // add the left over gap
                actualGap += currentGap;
                // next gap is relative to the current gap
                actualGap += i;
            }
        } else {
            // no patching required. add base to unpacked value to get final value
            literals[numLiterals++] = base + unpacked[i];
        }
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamV2Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)

Example 17 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongInputStreamV2 method next.

@Override
public void next(int[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numLiterals) {
            numLiterals = 0;
            used = 0;
            readValues();
        }
        int chunkSize = min(numLiterals - used, items);
        for (int i = 0; i < chunkSize; i++) {
            long literal = literals[used + i];
            int value = (int) literal;
            if (literal != value) {
                throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
            }
            values[offset + i] = value;
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamV2Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)

Example 18 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class OrcInputStream method readVarint.

public long readVarint(boolean signed) throws IOException {
    long result = 0;
    int shift = 0;
    int available = available();
    if (available >= 2 * Long.BYTES) {
        long word = ByteArrays.getLong(buffer, position);
        int count = 1;
        boolean atEnd = false;
        result = word & 0x7f;
        if ((word & 0x80) != 0) {
            long control = word >>> 8;
            long mask = 0x7f << 7;
            while (true) {
                word = word >>> 1;
                result |= word & mask;
                count++;
                if ((control & 0x80) == 0) {
                    atEnd = true;
                    break;
                }
                if (mask == 0x7fL << (7 * 7)) {
                    break;
                }
                mask = mask << 7;
                control = control >>> 8;
            }
            if (!atEnd) {
                word = ByteArrays.getLong(buffer, position + 8);
                result |= (word & 0x7f) << 56;
                if ((word & 0x80) == 0) {
                    count++;
                } else {
                    result |= 1L << 63;
                    count += 2;
                }
            }
        }
        position += count;
    } else {
        do {
            if (available == 0) {
                advance();
                available = available();
                if (available == 0) {
                    throw new OrcCorruptionException(orcDataSourceId, "End of stream in RLE Integer");
                }
            }
            available--;
            result |= (long) (buffer[position] & 0x7f) << shift;
            shift += 7;
        } while ((buffer[position++] & 0x80) != 0);
    }
    if (signed) {
        return zigzagDecode(result);
    } else {
        return result;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) InputStreamCheckpoint.createInputStreamCheckpoint(com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)

Example 19 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class OrcInputStream method advance.

// This comes from the Apache Hive ORC code
private void advance() throws IOException {
    if (compressedSliceInput == null || compressedSliceInput.remaining() == 0) {
        buffer = null;
        position = 0;
        length = 0;
        uncompressedOffset = 0;
        memoryUsage.setBytes(getRetainedSizeInBytes());
        return;
    }
    // 3 byte header
    // NOTE: this must match BLOCK_HEADER_SIZE
    currentCompressedBlockOffset = toIntExact(compressedSliceInput.position());
    int b0 = compressedSliceInput.readUnsignedByte();
    int b1 = compressedSliceInput.readUnsignedByte();
    int b2 = compressedSliceInput.readUnsignedByte();
    boolean isUncompressed = (b0 & 0x01) == 1;
    int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >>> 1);
    if (chunkLength < 0 || chunkLength > compressedSliceInput.remaining()) {
        throw new OrcCorruptionException(orcDataSourceId, "The chunkLength (%s) must not be negative or greater than remaining size (%s)", chunkLength, compressedSliceInput.remaining());
    }
    if (isUncompressed) {
        buffer = ensureCapacity(buffer, chunkLength);
        length = compressedSliceInput.read(buffer, 0, chunkLength);
        if (dwrfDecryptor.isPresent()) {
            buffer = dwrfDecryptor.get().decrypt(buffer, 0, chunkLength);
            length = buffer.length;
        }
        position = 0;
    } else {
        sharedDecompressionBuffer.ensureCapacity(chunkLength);
        byte[] compressedBuffer = sharedDecompressionBuffer.get();
        int readCompressed = compressedSliceInput.read(compressedBuffer, 0, chunkLength);
        if (dwrfDecryptor.isPresent()) {
            compressedBuffer = dwrfDecryptor.get().decrypt(compressedBuffer, 0, chunkLength);
            readCompressed = compressedBuffer.length;
        }
        length = decompressor.get().decompress(compressedBuffer, 0, readCompressed, bufferAdapter);
        position = 0;
    }
    uncompressedOffset = position;
    memoryUsage.setBytes(getRetainedSizeInBytes());
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) InputStreamCheckpoint.createInputStreamCheckpoint(com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)

Example 20 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class OrcInputStream method seekToCheckpoint.

public boolean seekToCheckpoint(long checkpoint) throws IOException {
    int compressedBlockOffset = decodeCompressedBlockOffset(checkpoint);
    int decompressedOffset = decodeDecompressedOffset(checkpoint);
    boolean discardedBuffer;
    if (compressedBlockOffset != currentCompressedBlockOffset) {
        if (!decompressor.isPresent() && !dwrfDecryptor.isPresent()) {
            throw new OrcCorruptionException(orcDataSourceId, "Reset stream has a block offset but stream is not compressed or encrypted");
        }
        compressedSliceInput.setPosition(compressedBlockOffset);
        buffer = new byte[0];
        memoryUsage.setBytes(getRetainedSizeInBytes());
        position = 0;
        length = 0;
        uncompressedOffset = 0;
        discardedBuffer = true;
    } else {
        discardedBuffer = false;
    }
    if (decompressedOffset != position - uncompressedOffset) {
        position = uncompressedOffset;
        if (available() < decompressedOffset) {
            decompressedOffset -= available();
            advance();
        }
        position += decompressedOffset;
    } else if (length == 0) {
        advance();
        position += decompressedOffset;
    }
    return discardedBuffer;
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) InputStreamCheckpoint.createInputStreamCheckpoint(com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)53 Block (com.facebook.presto.common.block.Block)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)10 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)10 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)9 BlockBuilder (com.facebook.presto.spi.block.BlockBuilder)8 LongStreamV2Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)6 InputStreamCheckpoint.createInputStreamCheckpoint (com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)5 Slice (io.airlift.slice.Slice)5 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)4 LongStreamV1Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint)4 ByteStreamCheckpoint (com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 Block (com.facebook.presto.spi.block.Block)3 ByteArrayBlock (com.facebook.presto.common.block.ByteArrayBlock)2 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)2 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)2 DecimalStreamCheckpoint (com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2