Search in sources :

Example 46 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongDirectBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) ShortArrayBlock(com.facebook.presto.common.block.ShortArrayBlock) IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) LongArrayBlock(com.facebook.presto.common.block.LongArrayBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 47 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class MapDirectBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            long entrySkipSize = lengthStream.sum(readOffset);
            keyStreamReader.prepareNextRead(toIntExact(entrySkipSize));
            valueStreamReader.prepareNextRead(toIntExact(entrySkipSize));
        }
    }
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    boolean[] nullVector = null;
    if (presentStream == null) {
        if (lengthStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
        }
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        nullVector = new boolean[nextBatchSize];
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            lengthStream.next(offsetVector, nextBatchSize - nullValues);
            unpackLengthNulls(offsetVector, nullVector, nextBatchSize - nullValues);
        }
    }
    Type keyType = type.getKeyType();
    Type valueType = type.getValueType();
    // Calculate the entryCount. Note that the values in the offsetVector are still length values now.
    int entryCount = 0;
    for (int i = 0; i < offsetVector.length - 1; i++) {
        entryCount += offsetVector[i];
    }
    Block keys;
    Block values;
    if (entryCount > 0) {
        keyStreamReader.prepareNextRead(entryCount);
        valueStreamReader.prepareNextRead(entryCount);
        keys = keyStreamReader.readBlock();
        values = valueStreamReader.readBlock();
    } else {
        keys = keyType.createBlockBuilder(null, 0).build();
        values = valueType.createBlockBuilder(null, 1).build();
    }
    Block[] keyValueBlock = options.mapNullKeysEnabled() ? new Block[] { keys, values } : filterOutNullKeys(nextBatchSize, keys, values, offsetVector);
    convertLengthVectorToOffsetVector(offsetVector);
    Block block = type.createBlockFromKeyValue(nextBatchSize, Optional.ofNullable(nullVector), offsetVector, keyValueBlock[0], keyValueBlock[1]);
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : MapType(com.facebook.presto.common.type.MapType) ReaderUtils.verifyStreamType(com.facebook.presto.orc.reader.ReaderUtils.verifyStreamType) Type(com.facebook.presto.common.type.Type) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 48 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class ByteInputStream method readNextBlock.

// This is based on the Apache Hive ORC code
private void readNextBlock() throws IOException {
    lastReadInputCheckpoint = input.getCheckpoint();
    int control = input.read();
    if (control == -1) {
        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of buffer RLE byte");
    }
    offset = 0;
    // if byte high bit is not set, this is a repetition; otherwise it is a literal sequence
    if ((control & 0x80) == 0) {
        length = control + MIN_REPEAT_SIZE;
        // read the repeated value
        int value = input.read();
        if (value == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading RLE byte got EOF");
        }
        // fill buffer with the value
        Arrays.fill(buffer, 0, length, (byte) value);
    } else {
        // length is 2's complement of byte
        length = 0x100 - control;
        // read the literals into the buffer
        input.readFully(buffer, 0, length);
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) ByteStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)

Example 49 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class ByteInputStream method next.

public void next(byte[] values, int items) throws IOException {
    int outputOffset = 0;
    while (outputOffset < items) {
        if (offset == length) {
            readNextBlock();
        }
        if (length == 0) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Unexpected end of stream");
        }
        int chunkSize = min(items - outputOffset, length - offset);
        System.arraycopy(buffer, offset, values, outputOffset, chunkSize);
        outputOffset += chunkSize;
        offset += chunkSize;
    }
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) ByteStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)

Example 50 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class ByteBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) ByteArrayBlock(com.facebook.presto.common.block.ByteArrayBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)53 Block (com.facebook.presto.common.block.Block)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)10 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)10 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)9 BlockBuilder (com.facebook.presto.spi.block.BlockBuilder)8 LongStreamV2Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)6 InputStreamCheckpoint.createInputStreamCheckpoint (com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)5 Slice (io.airlift.slice.Slice)5 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)4 LongStreamV1Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint)4 ByteStreamCheckpoint (com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 Block (com.facebook.presto.spi.block.Block)3 ByteArrayBlock (com.facebook.presto.common.block.ByteArrayBlock)2 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)2 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)2 DecimalStreamCheckpoint (com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2