use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class DoubleColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class LongColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class MapColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
long entrySkipSize = lengthStream.sum(readOffset);
keyColumnReader.prepareNextRead(toIntExact(entrySkipSize));
valueColumnReader.prepareNextRead(toIntExact(entrySkipSize));
}
}
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
boolean[] nullVector = null;
if (presentStream == null) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
lengthStream.next(offsetVector, nextBatchSize);
} else {
nullVector = new boolean[nextBatchSize];
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
lengthStream.next(offsetVector, nextBatchSize - nullValues);
unpackLengthNulls(offsetVector, nullVector, nextBatchSize - nullValues);
}
}
// Calculate the entryCount. Note that the values in the offsetVector are still length values now.
int entryCount = 0;
for (int i = 0; i < offsetVector.length - 1; i++) {
entryCount += offsetVector[i];
}
Block keys;
Block values;
if (entryCount > 0) {
keyColumnReader.prepareNextRead(entryCount);
valueColumnReader.prepareNextRead(entryCount);
keys = keyColumnReader.readBlock();
values = blockFactory.createBlock(entryCount, valueColumnReader::readBlock, true);
} else {
keys = type.getKeyType().createBlockBuilder(null, 0).build();
values = type.getValueType().createBlockBuilder(null, 1).build();
}
Block[] keyValueBlock = createKeyValueBlock(nextBatchSize, keys, values, offsetVector);
convertLengthVectorToOffsetVector(offsetVector);
readOffset = 0;
nextBatchSize = 0;
return type.createBlockFromKeyValue(Optional.ofNullable(nullVector), offsetVector, keyValueBlock[0], keyValueBlock[1]);
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class SliceDictionaryColumnReader method openRowGroup.
private void openRowGroup() throws IOException {
// read the dictionary
if (!dictionaryOpen) {
if (dictionarySize > 0) {
// resize the dictionary lengths array if necessary
if (dictionaryLength.length < dictionarySize) {
dictionaryLength = new int[dictionarySize];
}
// read the lengths
LongInputStream lengthStream = dictionaryLengthStreamSource.openStream();
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is missing");
}
lengthStream.next(dictionaryLength, dictionarySize);
long dataLength = 0;
for (int i = 0; i < dictionarySize; i++) {
dataLength += dictionaryLength[i];
}
// we must always create a new dictionary array because the previous dictionary may still be referenced
dictionaryData = new byte[toIntExact(dataLength)];
// add one extra entry for null
dictionaryOffsetVector = new int[dictionarySize + 2];
// read dictionary values
ByteArrayInputStream dictionaryDataStream = dictionaryDataStreamSource.openStream();
readDictionary(dictionaryDataStream, dictionarySize, dictionaryLength, 0, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
} else {
dictionaryData = EMPTY_DICTIONARY_DATA;
dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
}
}
dictionaryOpen = true;
setDictionaryBlockData(dictionaryData, dictionaryOffsetVector, dictionarySize + 1);
presentStream = presentStreamSource.openStream();
dataStream = dataStreamSource.openStream();
rowGroupOpen = true;
}
use of io.trino.orc.OrcCorruptionException in project trino by trinodb.
the class LongInputStreamV1 method next.
@Override
public void next(int[] values, int items) throws IOException {
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
if (repeat) {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[0] + ((used + i) * delta);
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
} else {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
int value = (int) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
}
values[offset + i] = value;
}
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
Aggregations