use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongDirectBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class MapDirectBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
long entrySkipSize = lengthStream.sum(readOffset);
keyStreamReader.prepareNextRead(toIntExact(entrySkipSize));
valueStreamReader.prepareNextRead(toIntExact(entrySkipSize));
}
}
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
boolean[] nullVector = null;
if (presentStream == null) {
if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
lengthStream.next(offsetVector, nextBatchSize);
} else {
nullVector = new boolean[nextBatchSize];
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
lengthStream.next(offsetVector, nextBatchSize - nullValues);
unpackLengthNulls(offsetVector, nullVector, nextBatchSize - nullValues);
}
}
Type keyType = type.getKeyType();
Type valueType = type.getValueType();
// Calculate the entryCount. Note that the values in the offsetVector are still length values now.
int entryCount = 0;
for (int i = 0; i < offsetVector.length - 1; i++) {
entryCount += offsetVector[i];
}
Block keys;
Block values;
if (entryCount > 0) {
keyStreamReader.prepareNextRead(entryCount);
valueStreamReader.prepareNextRead(entryCount);
keys = keyStreamReader.readBlock();
values = valueStreamReader.readBlock();
} else {
keys = keyType.createBlockBuilder(null, 0).build();
values = valueType.createBlockBuilder(null, 1).build();
}
Block[] keyValueBlock = options.mapNullKeysEnabled() ? new Block[] { keys, values } : filterOutNullKeys(nextBatchSize, keys, values, offsetVector);
convertLengthVectorToOffsetVector(offsetVector);
Block block = type.createBlockFromKeyValue(nextBatchSize, Optional.ofNullable(nullVector), offsetVector, keyValueBlock[0], keyValueBlock[1]);
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class ByteInputStream method readNextBlock.
// This is based on the Apache Hive ORC code
private void readNextBlock() throws IOException {
lastReadInputCheckpoint = input.getCheckpoint();
int control = input.read();
if (control == -1) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of buffer RLE byte");
}
offset = 0;
// if byte high bit is not set, this is a repetition; otherwise it is a literal sequence
if ((control & 0x80) == 0) {
length = control + MIN_REPEAT_SIZE;
// read the repeated value
int value = input.read();
if (value == -1) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading RLE byte got EOF");
}
// fill buffer with the value
Arrays.fill(buffer, 0, length, (byte) value);
} else {
// length is 2's complement of byte
length = 0x100 - control;
// read the literals into the buffer
input.readFully(buffer, 0, length);
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class ByteInputStream method next.
public void next(byte[] values, int items) throws IOException {
int outputOffset = 0;
while (outputOffset < items) {
if (offset == length) {
readNextBlock();
}
if (length == 0) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Unexpected end of stream");
}
int chunkSize = min(items - outputOffset, length - offset);
System.arraycopy(buffer, offset, values, outputOffset, chunkSize);
outputOffset += chunkSize;
offset += chunkSize;
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class ByteBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(readOffset);
}
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(TINYINT, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
Aggregations