use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class UnionColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
int[] readOffsets = new int[fieldReaders.size()];
for (byte tag : dataStream.next(readOffset)) {
readOffsets[tag]++;
}
for (int i = 0; i < fieldReaders.size(); i++) {
fieldReaders.get(i).prepareNextRead(readOffsets[i]);
}
}
}
boolean[] nullVector = null;
Block[] blocks;
if (presentStream == null) {
blocks = getBlocks(nextBatchSize);
} else {
nullVector = new boolean[nextBatchSize];
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
blocks = getBlocks(nextBatchSize - nullValues);
} else {
List<Type> typeParameters = type.getTypeParameters();
blocks = new Block[typeParameters.size() + 1];
blocks[0] = TINYINT.createBlockBuilder(null, 0).build();
for (int i = 0; i < typeParameters.size(); i++) {
blocks[i + 1] = typeParameters.get(i).createBlockBuilder(null, 0).build();
}
}
}
verify(Arrays.stream(blocks).mapToInt(Block::getPositionCount).distinct().count() == 1);
Block rowBlock = RowBlock.fromFieldBlocks(nextBatchSize, Optional.ofNullable(nullVector), blocks);
readOffset = 0;
nextBatchSize = 0;
return rowBlock;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class SliceDictionaryColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = readAllNullsBlock();
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = readAllNullsBlock();
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class SliceDirectColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
long dataSkipSize = lengthStream.sum(readOffset);
if (dataSkipSize > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(dataSkipSize);
}
}
}
if (lengthStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
// create new isNullVector and offsetVector for VariableWidthBlock
boolean[] isNullVector = null;
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
if (presentStream == null) {
lengthStream.next(offsetVector, nextBatchSize);
} else {
isNullVector = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullCount == nextBatchSize) {
// all nulls
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
if (nullCount == 0) {
isNullVector = null;
lengthStream.next(offsetVector, nextBatchSize);
} else {
lengthStream.next(offsetVector, nextBatchSize - nullCount);
unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
}
}
// Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
long totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) {
totalLength += offsetVector[i];
}
int currentBatchSize = nextBatchSize;
readOffset = 0;
nextBatchSize = 0;
if (totalLength == 0) {
return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
}
if (totalLength > ONE_GIGABYTE) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Values in column \"%s\" are too large to process for Trino. %s column values are larger than 1GB [%s]", column.getPath(), nextBatchSize, column.getOrcDataSourceId()));
}
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
// allocate enough space to read
byte[] data = new byte[toIntExact(totalLength)];
Slice slice = Slices.wrappedBuffer(data);
if (maxCodePointCount < 0) {
// unbounded, simply read all data in on shot
dataStream.next(data, 0, data.length);
convertLengthVectorToOffsetVector(offsetVector);
} else {
// We do the following operations together in the for loop:
// * truncate strings
// * convert original length values in offsetVector into truncated offset values
int currentLength = offsetVector[0];
offsetVector[0] = 0;
for (int i = 1; i <= currentBatchSize; i++) {
int nextLength = offsetVector[i];
if (isNullVector != null && isNullVector[i - 1]) {
checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
offsetVector[i] = offsetVector[i - 1];
currentLength = nextLength;
continue;
}
int offset = offsetVector[i - 1];
// read data without truncation
dataStream.next(data, offset, offset + currentLength);
// adjust offsetVector with truncated length
int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
verify(truncatedLength >= 0);
offsetVector[i] = offset + truncatedLength;
currentLength = nextLength;
}
}
// this can lead to over-retention but unlikely to happen given truncation rarely happens
return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class ByteInputStream method next.
public void next(byte[] values, int items) throws IOException {
int outputOffset = 0;
while (outputOffset < items) {
if (offset == length) {
readNextBlock();
}
if (length == 0) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Unexpected end of stream");
}
int chunkSize = min(items - outputOffset, length - offset);
System.arraycopy(buffer, offset, values, outputOffset, chunkSize);
outputOffset += chunkSize;
offset += chunkSize;
}
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class CompressedOrcChunkLoader method seekToCheckpoint.
@Override
public void seekToCheckpoint(long checkpoint) throws IOException {
int compressedOffset = decodeCompressedBlockOffset(checkpoint);
if (compressedOffset >= dataReader.getSize()) {
throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Seek past end of stream");
}
// is the compressed offset within the current compressed buffer
if (compressedBufferStart <= compressedOffset && compressedOffset < compressedBufferStart + compressedBufferStream.length()) {
compressedBufferStream.setPosition(compressedOffset - compressedBufferStart);
} else {
compressedBufferStart = compressedOffset;
compressedBufferStream = EMPTY_SLICE.getInput();
}
nextUncompressedOffset = decodeDecompressedOffset(checkpoint);
lastCheckpoint = checkpoint;
}
Aggregations