Search in sources :

Example 11 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class OrcInputStream method readFully.

public void readFully(byte[] buffer, int offset, int length) throws IOException {
    while (offset < length) {
        int result = read(buffer, offset, length - offset);
        if (result < 0) {
            throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
        }
        offset += result;
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) InputStreamCheckpoint.createInputStreamCheckpoint(io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)

Example 12 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV2 method readPatchedBaseValues.

// This comes from the Apache Hive ORC code
private void readPatchedBaseValues(int firstByte) throws IOException {
    // extract the number of fixed bits
    int fb = LongDecode.decodeBitWidth((firstByte >>> 1) & 0b1_1111);
    // extract the run length of data blob
    int length = (firstByte & 0b1) << 8;
    length |= input.read();
    // runs are always one off
    length += 1;
    // extract the number of bytes occupied by base
    int thirdByte = input.read();
    int baseWidth = (thirdByte >>> 5) & 0b0111;
    // base width is one off
    baseWidth += 1;
    // extract patch width
    int patchWidth = LongDecode.decodeBitWidth(thirdByte & 0b1_1111);
    // read fourth byte and extract patch gap width
    int fourthByte = input.read();
    int patchGapWidth = (fourthByte >>> 5) & 0b0111;
    // patch gap width is one off
    patchGapWidth += 1;
    // extract the length of the patch list
    int patchListLength = fourthByte & 0b1_1111;
    // read the next base width number of bytes to extract base value
    long base = bytesToLongBE(input, baseWidth);
    long mask = (1L << ((baseWidth * 8) - 1));
    // if MSB of base value is 1 then base is negative value else positive
    if ((base & mask) != 0) {
        base = base & ~mask;
        base = -base;
    }
    // unpack the data blob
    long[] unpacked = new long[length];
    packer.unpack(unpacked, 0, length, fb, input);
    // unpack the patch blob
    long[] unpackedPatch = new long[patchListLength];
    if ((patchWidth + patchGapWidth) > 64 && !skipCorrupt) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Invalid RLEv2 encoded stream");
    }
    int bitSize = LongDecode.getClosestFixedBits(patchWidth + patchGapWidth);
    packer.unpack(unpackedPatch, 0, patchListLength, bitSize, input);
    // apply the patch directly when decoding the packed data
    int patchIndex = 0;
    long currentGap;
    long currentPatch;
    long patchMask = ((1L << patchWidth) - 1);
    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
    currentPatch = unpackedPatch[patchIndex] & patchMask;
    long actualGap = 0;
    // if gap is <=255 then patch value cannot be 0
    while (currentGap == 255 && currentPatch == 0) {
        actualGap += 255;
        patchIndex++;
        currentGap = unpackedPatch[patchIndex] >>> patchWidth;
        currentPatch = unpackedPatch[patchIndex] & patchMask;
    }
    // add the left over gap
    actualGap += currentGap;
    // unpack data blob, patch it (if required), add base to get final result
    for (int i = 0; i < unpacked.length; i++) {
        if (i == actualGap) {
            // extract the patch value
            long patchedValue = unpacked[i] | (currentPatch << fb);
            // add base to patched value
            literals[numLiterals++] = base + patchedValue;
            // increment the patch to point to next entry in patch list
            patchIndex++;
            if (patchIndex < patchListLength) {
                // read the next gap and patch
                currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                currentPatch = unpackedPatch[patchIndex] & patchMask;
                actualGap = 0;
                // <=255 then patch cannot be 0
                while (currentGap == 255 && currentPatch == 0) {
                    actualGap += 255;
                    patchIndex++;
                    currentGap = unpackedPatch[patchIndex] >>> patchWidth;
                    currentPatch = unpackedPatch[patchIndex] & patchMask;
                }
                // add the left over gap
                actualGap += currentGap;
                // next gap is relative to the current gap
                actualGap += i;
            }
        } else {
            // no patching required. add base to unpacked value to get final value
            literals[numLiterals++] = base + unpacked[i];
        }
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV2Checkpoint(io.trino.orc.checkpoint.LongStreamV2Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 13 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV2 method next.

@Override
public void next(int[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numLiterals) {
            numLiterals = 0;
            used = 0;
            readValues();
        }
        int chunkSize = min(numLiterals - used, items);
        for (int i = 0; i < chunkSize; i++) {
            long literal = literals[used + i];
            int value = (int) literal;
            if (literal != value) {
                throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
            }
            values[offset + i] = value;
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV2Checkpoint(io.trino.orc.checkpoint.LongStreamV2Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 14 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class ByteColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ByteArrayBlock(io.trino.spi.block.ByteArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 15 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class DecimalColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    seekToOffset();
    Block block;
    if (decimalStream == null && scaleStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
    } else if (presentStream == null) {
        checkDataStreamsArePresent();
        block = readNonNullBlock();
    } else {
        checkDataStreamsArePresent();
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : Block(io.trino.spi.block.Block) LongArrayBlock(io.trino.spi.block.LongArrayBlock) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) Int128ArrayBlock(io.trino.spi.block.Int128ArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Aggregations

OrcCorruptionException (io.trino.orc.OrcCorruptionException)33 Block (io.trino.spi.block.Block)13 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)11 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)4 InputStreamCheckpoint.createInputStreamCheckpoint (io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)4 LongStreamV2Checkpoint (io.trino.orc.checkpoint.LongStreamV2Checkpoint)4 ByteArrayBlock (io.trino.spi.block.ByteArrayBlock)4 LongArrayBlock (io.trino.spi.block.LongArrayBlock)4 Slice (io.airlift.slice.Slice)3 LongStreamV1Checkpoint (io.trino.orc.checkpoint.LongStreamV1Checkpoint)3 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)2 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 IntArrayBlock (io.trino.spi.block.IntArrayBlock)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowBlock (io.trino.spi.block.RowBlock)2 VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)2 RowType (io.trino.spi.type.RowType)2 Type (io.trino.spi.type.Type)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1