use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class MapStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
long entrySkipSize = lengthStream.sum(readOffset);
keyStreamReader.prepareNextRead(toIntExact(entrySkipSize));
valueStreamReader.prepareNextRead(toIntExact(entrySkipSize));
}
}
// The length vector could be reused, but this simplifies the code below by
// taking advantage of null entries being initialized to zero. The vector
// could be reinitialized for each loop, but that is likely just as expensive
// as allocating a new array
int[] lengthVector = new int[nextBatchSize];
boolean[] nullVector = new boolean[nextBatchSize];
if (presentStream == null) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
lengthStream.nextIntVector(nextBatchSize, lengthVector);
} else {
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
lengthStream.nextIntVector(nextBatchSize, lengthVector, nullVector);
}
}
Type keyType = type.getTypeParameters().get(0);
Type valueType = type.getTypeParameters().get(1);
int entryCount = 0;
for (int length : lengthVector) {
entryCount += length;
}
Block keys;
Block values;
if (entryCount > 0) {
keyStreamReader.prepareNextRead(entryCount);
valueStreamReader.prepareNextRead(entryCount);
keys = keyStreamReader.readBlock(keyType);
values = valueStreamReader.readBlock(valueType);
} else {
keys = keyType.createBlockBuilder(new BlockBuilderStatus(), 0).build();
values = valueType.createBlockBuilder(new BlockBuilderStatus(), 1).build();
}
InterleavedBlock keyValueBlock = createKeyValueBlock(nextBatchSize, keys, values, lengthVector);
// convert lengths into offsets into the keyValueBlock (e.g., two positions per entry)
int[] offsets = new int[nextBatchSize + 1];
for (int i = 1; i < offsets.length; i++) {
int length = lengthVector[i - 1] * 2;
offsets[i] = offsets[i - 1] + length;
}
ArrayBlock arrayBlock = new ArrayBlock(nextBatchSize, nullVector, offsets, keyValueBlock);
readOffset = 0;
nextBatchSize = 0;
return arrayBlock;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class SliceDictionaryStreamReader method openRowGroup.
private void openRowGroup(Type type) throws IOException {
// read the dictionary
if (!stripeDictionaryOpen) {
// We must always create a new dictionary array because the previous dictionary may still be referenced
// add one extra entry for null
stripeDictionary = new Slice[stripeDictionarySize + 1];
if (stripeDictionarySize > 0) {
int[] dictionaryLength = new int[stripeDictionarySize];
// read the lengths
LongStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
if (lengthStream == null) {
throw new OrcCorruptionException("Dictionary is not empty but dictionary length stream is not present");
}
lengthStream.nextIntVector(stripeDictionarySize, dictionaryLength);
// read dictionary values
ByteArrayStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
readDictionary(dictionaryDataStream, stripeDictionarySize, dictionaryLength, 0, stripeDictionary, type);
}
}
stripeDictionaryOpen = true;
// read row group dictionary
RowGroupDictionaryLengthStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
if (dictionaryLengthStream != null) {
int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
// We must always create a new dictionary array because the previous dictionary may still be referenced
// The first elements of the dictionary are from the stripe dictionary, then the row group dictionary elements, and then a null
rowGroupDictionary = Arrays.copyOf(stripeDictionary, stripeDictionarySize + rowGroupDictionarySize + 1);
setDictionaryBlockData(rowGroupDictionary);
// resize the dictionary lengths array if necessary
if (rowGroupDictionaryLength.length < rowGroupDictionarySize) {
rowGroupDictionaryLength = new int[rowGroupDictionarySize];
}
// read the lengths
dictionaryLengthStream.nextIntVector(rowGroupDictionarySize, rowGroupDictionaryLength);
// read dictionary values
ByteArrayStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, rowGroupDictionary, type);
} else {
// there is no row group dictionary so use the stripe dictionary
setDictionaryBlockData(stripeDictionary);
}
presentStream = presentStreamSource.openStream();
inDictionaryStream = inDictionaryStreamSource.openStream();
dataStream = dataStreamSource.openStream();
rowGroupOpen = true;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class SliceDictionaryStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup(type);
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
if (inDictionaryStream != null) {
inDictionaryStream.skip(readOffset);
}
dataStream.skip(readOffset);
}
}
if (isNullVector.length < nextBatchSize) {
isNullVector = new boolean[nextBatchSize];
}
int[] dataVector = new int[nextBatchSize];
if (presentStream == null) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
Arrays.fill(isNullVector, false);
dataStream.nextIntVector(nextBatchSize, dataVector);
} else {
int nullValues = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullValues != nextBatchSize) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.nextIntVector(nextBatchSize, dataVector, isNullVector);
}
}
if (inDictionary.length < nextBatchSize) {
inDictionary = new boolean[nextBatchSize];
}
if (inDictionaryStream == null) {
Arrays.fill(inDictionary, true);
} else {
inDictionaryStream.getSetBits(nextBatchSize, inDictionary, isNullVector);
}
// create the dictionary ids
for (int i = 0; i < nextBatchSize; i++) {
if (isNullVector[i]) {
// null is the last entry in the slice dictionary
dataVector[i] = dictionaryBlock.getPositionCount() - 1;
} else if (inDictionary[i]) {
// stripe dictionary elements have the same dictionary id
} else {
// row group dictionary elements are after the main dictionary
dataVector[i] += stripeDictionarySize;
}
}
// copy ids into a private array for this block since data vector is reused
Block block = new DictionaryBlock(nextBatchSize, dictionaryBlock, dataVector);
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class SliceDirectStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but length stream is not present");
}
long dataSkipSize = lengthStream.sum(readOffset);
if (dataSkipSize > 0) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.skip(dataSkipSize);
}
}
}
if (isNullVector.length < nextBatchSize) {
isNullVector = new boolean[nextBatchSize];
}
if (lengthVector.length < nextBatchSize) {
lengthVector = new int[nextBatchSize];
}
if (presentStream == null) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but length stream is not present");
}
Arrays.fill(isNullVector, false);
lengthStream.nextIntVector(nextBatchSize, lengthVector);
} else {
int nullValues = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullValues != nextBatchSize) {
if (lengthStream == null) {
throw new OrcCorruptionException("Value is not null but length stream is not present");
}
lengthStream.nextIntVector(nextBatchSize, lengthVector, isNullVector);
}
}
int totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) {
if (!isNullVector[i]) {
totalLength += lengthVector[i];
}
}
byte[] data = EMPTY_BYTE_ARRAY;
if (totalLength > 0) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
data = dataStream.next(totalLength);
}
Slice[] sliceVector = new Slice[nextBatchSize];
int offset = 0;
for (int i = 0; i < nextBatchSize; i++) {
if (!isNullVector[i]) {
int length = lengthVector[i];
Slice value = Slices.wrappedBuffer(data, offset, length);
if (isVarcharType(type)) {
value = truncateToLength(value, type);
}
if (isCharType(type)) {
value = trimSpacesAndTruncateToLength(value, type);
}
sliceVector[i] = value;
offset += length;
}
}
readOffset = 0;
nextBatchSize = 0;
return new SliceArrayBlock(sliceVector.length, sliceVector);
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class BooleanStreamReader method readBlock.
@Override
public Block readBlock(Type type) throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.skip(readOffset);
}
}
BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize);
if (presentStream == null) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.getSetBits(type, nextBatchSize, builder);
} else {
if (nullVector.length < nextBatchSize) {
nullVector = new boolean[nextBatchSize];
}
int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
if (nullValues != nextBatchSize) {
if (dataStream == null) {
throw new OrcCorruptionException("Value is not null but data stream is not present");
}
dataStream.getSetBits(type, nextBatchSize, builder, nullVector);
} else {
for (int i = 0; i < nextBatchSize; i++) {
builder.appendNull();
}
}
}
readOffset = 0;
nextBatchSize = 0;
return builder.build();
}
Aggregations