Search in sources :

Example 1 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class DecimalStreamReader method readBlock.

@Override
public Block readBlock(Type type) throws IOException {
    DecimalType decimalType = (DecimalType) type;
    if (!rowGroupOpen) {
        openRowGroup();
    }
    seekToOffset();
    allocateVectors();
    BlockBuilder builder = decimalType.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize);
    if (presentStream == null) {
        if (decimalStream == null) {
            throw new OrcCorruptionException("Value is not null but decimal stream is not present");
        }
        if (scaleStream == null) {
            throw new OrcCorruptionException("Value is not null but scale stream is not present");
        }
        Arrays.fill(nullVector, false);
        scaleStream.nextLongVector(nextBatchSize, scaleVector);
        if (decimalType.isShort()) {
            decimalStream.nextShortDecimalVector(nextBatchSize, builder, decimalType, scaleVector);
        } else {
            decimalStream.nextLongDecimalVector(nextBatchSize, builder, decimalType, scaleVector);
        }
    } else {
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (decimalStream == null) {
                throw new OrcCorruptionException("Value is not null but decimal stream is not present");
            }
            if (scaleStream == null) {
                throw new OrcCorruptionException("Value is not null but scale stream is not present");
            }
            scaleStream.nextLongVector(nextBatchSize, scaleVector, nullVector);
            if (decimalType.isShort()) {
                decimalStream.nextShortDecimalVector(nextBatchSize, builder, decimalType, scaleVector, nullVector);
            } else {
                decimalStream.nextLongDecimalVector(nextBatchSize, builder, decimalType, scaleVector, nullVector);
            }
        } else {
            for (int i = 0; i < nextBatchSize; i++) {
                builder.appendNull();
            }
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return builder.build();
}
Also used : DecimalType(com.facebook.presto.spi.type.DecimalType) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus)

Example 2 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class DoubleStreamReader method readBlock.

@Override
public Block readBlock(Type type) throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            dataStream.skip(readOffset);
        }
    }
    BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize);
    if (presentStream == null) {
        if (dataStream == null) {
            throw new OrcCorruptionException("Value is not null but data stream is not present");
        }
        dataStream.nextVector(type, nextBatchSize, builder);
    } else {
        if (nullVector.length < nextBatchSize) {
            nullVector = new boolean[nextBatchSize];
        }
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (dataStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            dataStream.nextVector(type, nextBatchSize, builder, nullVector);
        } else {
            for (int i = 0; i < nextBatchSize; i++) {
                builder.appendNull();
            }
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return builder.build();
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus)

Example 3 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class ListStreamReader method readBlock.

@Override
public Block readBlock(Type type) throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            long elementSkipSize = lengthStream.sum(readOffset);
            elementStreamReader.prepareNextRead(toIntExact(elementSkipSize));
        }
    }
    // The length vector could be reused, but this simplifies the code below by
    // taking advantage of null entries being initialized to zero.  The vector
    // could be reinitialized for each loop, but that is likely just as expensive
    // as allocating a new array
    int[] lengthVector = new int[nextBatchSize];
    boolean[] nullVector = new boolean[nextBatchSize];
    if (presentStream == null) {
        if (lengthStream == null) {
            throw new OrcCorruptionException("Value is not null but data stream is not present");
        }
        lengthStream.nextIntVector(nextBatchSize, lengthVector);
    } else {
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (lengthStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            lengthStream.nextIntVector(nextBatchSize, lengthVector, nullVector);
        }
    }
    int[] offsets = new int[nextBatchSize + 1];
    for (int i = 1; i < offsets.length; i++) {
        int length = lengthVector[i - 1];
        offsets[i] = offsets[i - 1] + length;
    }
    Type elementType = type.getTypeParameters().get(0);
    int elementCount = offsets[offsets.length - 1];
    Block elements;
    if (elementCount > 0) {
        elementStreamReader.prepareNextRead(elementCount);
        elements = elementStreamReader.readBlock(elementType);
    } else {
        elements = elementType.createBlockBuilder(new BlockBuilderStatus(), 0).build();
    }
    ArrayBlock arrayBlock = new ArrayBlock(nextBatchSize, nullVector, offsets, elements);
    readOffset = 0;
    nextBatchSize = 0;
    return arrayBlock;
}
Also used : Type(com.facebook.presto.spi.type.Type) ArrayBlock(com.facebook.presto.spi.block.ArrayBlock) Block(com.facebook.presto.spi.block.Block) ArrayBlock(com.facebook.presto.spi.block.ArrayBlock) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus)

Example 4 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongStreamV2 method readPatchedBaseValues.

// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
    // extract the number of fixed bits
    int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
    // extract the run length of data blob
    int length = (firstByte & 0b1) << 8;
    length |= input.read();
    // runs are always one off
    length += 1;
    // extract the number of bytes occupied by base
    int thirdByte = input.read();
    int baseWidth = (thirdByte >>> 5) & 0b0111;
    // base width is one off
    baseWidth += 1;
    // extract patch width
    int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
    // read fourth byte and extract patch gap width
    int fourthByte = input.read();
    int patchGapWidth = (fourthByte >>> 5) & 0b0111;
    // patch gap width is one off
    patchGapWidth += 1;
    // extract the length of the patch list
    int patchListLength = fourthByte & 0b1_1111;
    // read the next base width number of bytes to extract base value
    long base = bytesToLongBE(input, baseWidth);
    long mask = (1L << ((baseWidth * 8) - 1));
    // if MSB of base value is 1 then base is negative value else positive
    if ((base & mask) != 0) {
        base = base & ~mask;
        base = -base;
    }
    // unpack the data blob
    long[] unpacked = new long[length];
    packer.unpack(unpacked, 0, length, fb, input);
    // unpack the patch blob
    long[] unpackedPatch = new long[patchListLength];
    if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
        throw new OrcCorruptionException("ORC file is corrupt");
    }
    int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
    packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
    // apply the patch directly when decoding the packed data
    int patchIndex = 0;
    long currentGap;
    long currentPatch;
    long patchMask = ((1L << patchWidth) - 1);
    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
    currentPatch = unpackedPatch[patchIndex] & patchMask;
    long actualGap = 0;
    // if gap is <=255 then patch value cannot be 0
    while (currentGap == 255 && currentPatch == 0) {
        actualGap += 255;
        patchIndex++;
        currentGap = unpackedPatch[patchIndex] >>> patchWidth;
        currentPatch = unpackedPatch[patchIndex] & patchMask;
    }
    // add the left over gap
    actualGap += currentGap;
    // unpack data blob, patch it (if required), add base to get final result
    for (int i = 0; i < unpacked.length; i++) {
        if (i == actualGap) {
            // extract the patch value
            long patchedValue = unpacked[i] | (currentPatch << fb);
            // add base to patched value
            literals[numLiterals++] = base + patchedValue;
            // increment the patch to point to next entry in patch list
            patchIndex++;
            if (patchIndex < patchListLength) {
                // read the next gap and patch
                currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                currentPatch = unpackedPatch[patchIndex] & patchMask;
                actualGap = 0;
                // <=255 then patch cannot be 0
                while (currentGap == 255 && currentPatch == 0) {
                    actualGap += 255;
                    patchIndex++;
                    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                    currentPatch = unpackedPatch[patchIndex] & patchMask;
                }
                // add the left over gap
                actualGap += currentGap;
                // next gap is relative to the current gap
                actualGap += i;
            }
        } else {
            // no patching required. add base to unpacked value to get final value
            literals[numLiterals++] = base + unpacked[i];
        }
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamV2Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)

Example 5 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class OrcInputStream method decompressZip.

// This comes from the Apache Hive ORC code
private int decompressZip(Slice in) throws IOException {
    Inflater inflater = new Inflater(true);
    try {
        inflater.setInput((byte[]) in.getBase(), (int) (in.getAddress() - ARRAY_BYTE_BASE_OFFSET), in.length());
        allocateOrGrowBuffer(in.length() * EXPECTED_COMPRESSION_RATIO, false);
        int uncompressedLength = 0;
        while (true) {
            uncompressedLength += inflater.inflate(buffer, uncompressedLength, buffer.length - uncompressedLength);
            if (inflater.finished() || buffer.length >= maxBufferSize) {
                break;
            }
            int oldBufferSize = buffer.length;
            allocateOrGrowBuffer(buffer.length * 2, true);
            if (buffer.length <= oldBufferSize) {
                throw new IllegalStateException(String.format("Buffer failed to grow. Old size %d, current size %d", oldBufferSize, buffer.length));
            }
        }
        if (!inflater.finished()) {
            throw new OrcCorruptionException("Could not decompress all input (output buffer too small?)");
        }
        return uncompressedLength;
    } catch (DataFormatException e) {
        throw new OrcCorruptionException(e, "Invalid compressed stream");
    } finally {
        inflater.end();
    }
}
Also used : DataFormatException(java.util.zip.DataFormatException) Inflater(java.util.zip.Inflater) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) InputStreamCheckpoint.createInputStreamCheckpoint(com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)53 Block (com.facebook.presto.common.block.Block)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)10 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)10 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)9 BlockBuilder (com.facebook.presto.spi.block.BlockBuilder)8 LongStreamV2Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)6 InputStreamCheckpoint.createInputStreamCheckpoint (com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)5 Slice (io.airlift.slice.Slice)5 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)4 LongStreamV1Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint)4 ByteStreamCheckpoint (com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 Block (com.facebook.presto.spi.block.Block)3 ByteArrayBlock (com.facebook.presto.common.block.ByteArrayBlock)2 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)2 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)2 DecimalStreamCheckpoint (com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2