Search in sources :

Example 21 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV1 method readValues.

// This comes from the Apache Hive ORC code
private void readValues() throws IOException {
    lastReadInputCheckpoint = input.getCheckpoint();
    int control = input.read();
    if (control == -1) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
    }
    if (control < 0x80) {
        numLiterals = control + MIN_REPEAT_SIZE;
        used = 0;
        repeat = true;
        int delta = input.read();
        if (delta == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "End of stream in RLE Integer");
        }
        // convert from 0 to 255 to -128 to 127 by converting to a signed byte
        this.delta = (byte) delta;
        literals[0] = LongDecode.readVInt(signed, input);
    } else {
        numLiterals = 0x100 - control;
        used = 0;
        repeat = false;
        for (int i = 0; i < numLiterals; ++i) {
            literals[i] = LongDecode.readVInt(signed, input);
        }
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV1Checkpoint(io.trino.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 22 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV1 method next.

@Override
public void next(short[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numLiterals) {
            numLiterals = 0;
            used = 0;
            readValues();
        }
        int chunkSize = min(numLiterals - used, items);
        if (repeat) {
            for (int i = 0; i < chunkSize; i++) {
                long literal = literals[0] + ((used + i) * delta);
                short value = (short) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                }
                values[offset + i] = value;
            }
        } else {
            for (int i = 0; i < chunkSize; i++) {
                long literal = literals[used + i];
                short value = (short) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                }
                values[offset + i] = value;
            }
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV1Checkpoint(io.trino.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 23 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV2 method readValues.

// This comes from the Apache Hive ORC code
private void readValues() throws IOException {
    lastReadInputCheckpoint = input.getCheckpoint();
    // read the first 2 bits and determine the encoding type
    int firstByte = input.read();
    if (firstByte < 0) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
    }
    int enc = (firstByte >>> 6) & 0x03;
    if (EncodingType.SHORT_REPEAT.ordinal() == enc) {
        readShortRepeatValues(firstByte);
    } else if (EncodingType.DIRECT.ordinal() == enc) {
        readDirectValues(firstByte);
    } else if (EncodingType.PATCHED_BASE.ordinal() == enc) {
        readPatchedBaseValues(firstByte);
    } else {
        readDeltaValues(firstByte);
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV2Checkpoint(io.trino.orc.checkpoint.LongStreamV2Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 24 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV2 method next.

@Override
public void next(short[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numLiterals) {
            numLiterals = 0;
            used = 0;
            readValues();
        }
        int chunkSize = min(numLiterals - used, items);
        for (int i = 0; i < chunkSize; i++) {
            long literal = literals[used + i];
            short value = (short) literal;
            if (literal != value) {
                throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
            }
            values[offset + i] = value;
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV2Checkpoint(io.trino.orc.checkpoint.LongStreamV2Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Example 25 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class ByteInputStream method readNextBlock.

// This is based on the Apache Hive ORC code
private void readNextBlock() throws IOException {
    lastReadInputCheckpoint = input.getCheckpoint();
    int control = input.read();
    if (control == -1) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of buffer RLE byte");
    }
    offset = 0;
    // if byte high bit is not set, this is a repetition; otherwise it is a literal sequence
    if ((control & 0x80) == 0) {
        length = control + MIN_REPEAT_SIZE;
        // read the repeated value
        int value = input.read();
        if (value == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading RLE byte got EOF");
        }
        // fill buffer with the value
        Arrays.fill(buffer, 0, length, (byte) value);
    } else {
        // length is 2's complement of byte
        length = 0x100 - control;
        // read the literals into the buffer
        input.readFully(buffer, 0, length);
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint)

Aggregations

OrcCorruptionException (io.trino.orc.OrcCorruptionException)33 Block (io.trino.spi.block.Block)13 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)11 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)4 InputStreamCheckpoint.createInputStreamCheckpoint (io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)4 LongStreamV2Checkpoint (io.trino.orc.checkpoint.LongStreamV2Checkpoint)4 ByteArrayBlock (io.trino.spi.block.ByteArrayBlock)4 LongArrayBlock (io.trino.spi.block.LongArrayBlock)4 Slice (io.airlift.slice.Slice)3 LongStreamV1Checkpoint (io.trino.orc.checkpoint.LongStreamV1Checkpoint)3 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)2 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 IntArrayBlock (io.trino.spi.block.IntArrayBlock)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowBlock (io.trino.spi.block.RowBlock)2 VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)2 RowType (io.trino.spi.type.RowType)2 Type (io.trino.spi.type.Type)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1