Search in sources :

Example 11 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class DecimalInputStream method nextLong.

public long nextLong() throws IOException {
    long result = 0;
    int offset = 0;
    long b;
    do {
        b = input.read();
        if (b == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading BigInteger past EOF");
        }
        long work = 0x7f & b;
        if (offset >= 63 && (offset != 63 || work > 1)) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
        }
        result |= work << offset;
        offset += 7;
    } while (b >= 0x80);
    boolean isNegative = (result & 0x01) != 0;
    if (isNegative) {
        result += 1;
        result = -result;
        result = result >> 1;
        result |= 0x01L << 63;
    } else {
        result = result >> 1;
        result &= MAX_VALUE;
    }
    return result;
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) DecimalStreamCheckpoint(com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)

Example 12 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class SliceDirectBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the length reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present");
            }
            long dataSkipSize = lengthStream.sum(readOffset);
            if (dataSkipSize > 0) {
                if (dataStream == null) {
                    throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
                }
                dataStream.skip(dataSkipSize);
            }
        }
    }
    if (lengthStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        Block nullValueBlock = readAllNullsBlock();
        readOffset = 0;
        nextBatchSize = 0;
        return nullValueBlock;
    }
    // create new isNullVector and offsetVector for VariableWidthBlock
    boolean[] isNullVector = null;
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    if (presentStream == null) {
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        isNullVector = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
        if (nullCount == nextBatchSize) {
            // all nulls
            Block nullValueBlock = readAllNullsBlock();
            readOffset = 0;
            nextBatchSize = 0;
            return nullValueBlock;
        }
        if (nullCount == 0) {
            isNullVector = null;
            lengthStream.next(offsetVector, nextBatchSize);
        } else {
            lengthStream.next(offsetVector, nextBatchSize - nullCount);
            unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
        }
    }
    // Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
    long totalLength = 0;
    for (int i = 0; i < nextBatchSize; i++) {
        totalLength += offsetVector[i];
    }
    int currentBatchSize = nextBatchSize;
    readOffset = 0;
    nextBatchSize = 0;
    if (totalLength == 0) {
        return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
    }
    if (totalLength > ONE_GIGABYTE) {
        throw new GenericInternalException(format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", streamDescriptor.getFieldName(), currentBatchSize, streamDescriptor.getOrcDataSourceId()));
    }
    if (dataStream == null) {
        throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
    }
    // allocate enough space to read
    byte[] data = new byte[toIntExact(totalLength)];
    Slice slice = Slices.wrappedBuffer(data);
    if (maxCodePointCount < 0) {
        // unbounded, simply read all data in on shot
        dataStream.next(data, 0, data.length);
        convertLengthVectorToOffsetVector(offsetVector);
    } else {
        // We do the following operations together in the for loop:
        // * truncate strings
        // * convert original length values in offsetVector into truncated offset values
        int currentLength = offsetVector[0];
        offsetVector[0] = 0;
        for (int i = 1; i <= currentBatchSize; i++) {
            int nextLength = offsetVector[i];
            if (isNullVector != null && isNullVector[i - 1]) {
                checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
                offsetVector[i] = offsetVector[i - 1];
                currentLength = nextLength;
                continue;
            }
            int offset = offsetVector[i - 1];
            // read data without truncation
            dataStream.next(data, offset, offset + currentLength);
            // adjust offsetVector with truncated length
            int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
            verify(truncatedLength >= 0);
            offsetVector[i] = offset + truncatedLength;
            currentLength = nextLength;
        }
    }
    // this can lead to over-retention but unlikely to happen given truncation rarely happens
    return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
Also used : GenericInternalException(com.facebook.presto.common.GenericInternalException) Slice(io.airlift.slice.Slice) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock)

Example 13 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongInputStreamV1 method readHeader.

private void readHeader() throws IOException {
    int control = input.read();
    if (control == -1) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
    }
    if (control < 0x80) {
        numValuesInRun = control + MIN_REPEAT_SIZE;
        used = 0;
        repeat = true;
        delta = input.read();
        if (delta == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "End of stream in RLE Integer");
        }
        // convert from 0 to 255 to -128 to 127 by converting to a signed byte
        delta = (byte) delta;
        repeatBase = input.readVarint(signed);
    } else {
        numValuesInRun = 0x100 - control;
        used = 0;
        repeat = false;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamV1Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)

Example 14 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongInputStreamV1 method next.

@Override
public void next(int[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numValuesInRun) {
            numValuesInRun = 0;
            used = 0;
            readHeader();
        }
        int chunkSize = min(numValuesInRun - used, items);
        if (repeat) {
            for (int i = 0; i < chunkSize; i++) {
                long literal = repeatBase + ((used + i) * delta);
                int value = (int) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                }
                values[offset + i] = value;
            }
        } else {
            for (int i = 0; i < chunkSize; i++) {
                long literal = input.readVarint(signed);
                int value = (int) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                }
                values[offset + i] = value;
            }
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamV1Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)

Example 15 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongInputStreamV1 method next.

@Override
public void next(short[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numValuesInRun) {
            numValuesInRun = 0;
            used = 0;
            readHeader();
        }
        int chunkSize = min(numValuesInRun - used, items);
        if (repeat) {
            for (int i = 0; i < chunkSize; i++) {
                long literal = repeatBase + ((used + i) * delta);
                short value = (short) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                }
                values[offset + i] = value;
            }
        } else {
            for (int i = 0; i < chunkSize; i++) {
                long literal = input.readVarint(signed);
                short value = (short) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                }
                values[offset + i] = value;
            }
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongStreamV1Checkpoint(com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)53 Block (com.facebook.presto.common.block.Block)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)10 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)10 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)9 BlockBuilder (com.facebook.presto.spi.block.BlockBuilder)8 LongStreamV2Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)6 InputStreamCheckpoint.createInputStreamCheckpoint (com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)5 Slice (io.airlift.slice.Slice)5 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)4 LongStreamV1Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint)4 ByteStreamCheckpoint (com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 Block (com.facebook.presto.spi.block.Block)3 ByteArrayBlock (com.facebook.presto.common.block.ByteArrayBlock)2 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)2 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)2 DecimalStreamCheckpoint (com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2