use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class BooleanBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
dataStream.skip(readOffset);
}
}
if (dataStream == null && presentStream != null) {
presentStream.skip(nextBatchSize);
Block nullValueBlock = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
Block block;
if (dataStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull, nextBatchSize - nullCount);
} else {
block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class SliceDictionaryBatchStreamReader method openRowGroup.
private void openRowGroup() throws IOException {
// read the dictionary
if (!stripeDictionaryOpen) {
if (stripeDictionarySize > 0) {
// resize the dictionary lengths array if necessary
if (stripeDictionaryLength.length < stripeDictionarySize) {
stripeDictionaryLength = new int[stripeDictionarySize];
systemMemoryContext.setBytes(sizeOf(stripeDictionaryLength));
}
// read the lengths
LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
}
lengthStream.next(stripeDictionaryLength, stripeDictionarySize);
long dataLength = 0;
for (int i = 0; i < stripeDictionarySize; i++) {
dataLength += stripeDictionaryLength[i];
}
// we must always create a new dictionary array because the previous dictionary may still be referenced
stripeDictionaryData = new byte[toIntExact(dataLength)];
systemMemoryContext.setBytes(sizeOf(stripeDictionaryData));
// add one extra entry for null
stripeDictionaryOffsetVector = new int[stripeDictionarySize + 2];
systemMemoryContext.setBytes(sizeOf(stripeDictionaryOffsetVector));
// read dictionary values
ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, stripeDictionaryData, stripeDictionaryOffsetVector, maxCodePointCount, isCharType);
} else {
stripeDictionaryData = EMPTY_DICTIONARY_DATA;
stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
}
}
stripeDictionaryOpen = true;
// read row group dictionary
RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
if (dictionaryLengthStream != null) {
int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
// resize the dictionary lengths array if necessary
if (rowGroupDictionaryLength.length < rowGroupDictionarySize) {
rowGroupDictionaryLength = new int[rowGroupDictionarySize];
}
// read the lengths
dictionaryLengthStream.next(rowGroupDictionaryLength, rowGroupDictionarySize);
long dataLength = 0;
for (int i = 0; i < rowGroupDictionarySize; i++) {
dataLength += rowGroupDictionaryLength[i];
}
// We must always create a new dictionary array because the previous dictionary may still be referenced
// The first elements of the dictionary are from the stripe dictionary, then the row group dictionary elements, and then a null
byte[] rowGroupDictionaryData = Arrays.copyOf(stripeDictionaryData, stripeDictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength));
int[] rowGroupDictionaryOffsetVector = Arrays.copyOf(stripeDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2);
// read dictionary values
ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, rowGroupDictionaryData, rowGroupDictionaryOffsetVector, maxCodePointCount, isCharType);
setDictionaryBlockData(rowGroupDictionaryData, rowGroupDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 1);
} else {
// there is no row group dictionary so use the stripe dictionary
setDictionaryBlockData(stripeDictionaryData, stripeDictionaryOffsetVector, stripeDictionarySize + 1);
}
presentStream = presentStreamSource.openStream();
inDictionaryStream = inDictionaryStreamSource.openStream();
dataStream = dataStreamSource.openStream();
rowGroupOpen = true;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class SliceDictionaryBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
if (inDictionaryStream != null) {
inDictionaryStream.skip(readOffset);
}
dataStream.skip(readOffset);
}
}
int[] idsVector = new int[nextBatchSize];
if (presentStream == null) {
// Data doesn't have nulls
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
if (inDictionaryStream == null) {
dataStream.next(idsVector, nextBatchSize);
} else {
for (int i = 0; i < nextBatchSize; i++) {
idsVector[i] = toIntExact(dataStream.next());
if (!inDictionaryStream.nextBit()) {
// row group dictionary elements are after the main dictionary
idsVector[i] += stripeDictionarySize;
}
}
}
} else {
// Data has nulls
if (dataStream == null) {
// The only valid case for dataStream is null when data has nulls is that all values are nulls.
// In that case the only element in the dictionaryBlock is null and the ids in idsVector should
// be all 0's, so we don't need to update idVector again.
int nullValues = presentStream.getUnsetBits(nextBatchSize);
if (nullValues != nextBatchSize) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
} else {
for (int i = 0; i < nextBatchSize; i++) {
if (!presentStream.nextBit()) {
// null is the last entry in the slice dictionary
idsVector[i] = dictionaryBlock.getPositionCount() - 1;
} else {
idsVector[i] = toIntExact(dataStream.next());
if (inDictionaryStream != null && !inDictionaryStream.nextBit()) {
// row group dictionary elements are after the main dictionary
idsVector[i] += stripeDictionarySize;
}
}
}
}
}
Block block = new DictionaryBlock(nextBatchSize, dictionaryBlock, idsVector);
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class TimestampBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the data reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (secondsStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but seconds stream is missing");
}
if (nanosStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but nanos stream is missing");
}
secondsStream.skip(readOffset);
nanosStream.skip(readOffset);
}
}
Block block;
if (secondsStream == null && nanosStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
block = RunLengthEncodedBlock.create(TIMESTAMP, null, nextBatchSize);
} else if (presentStream == null) {
block = readNonNullBlock();
} else {
boolean[] isNull = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
if (nullCount == 0) {
block = readNonNullBlock();
} else if (nullCount != nextBatchSize) {
block = readNullBlock(isNull);
} else {
block = RunLengthEncodedBlock.create(TIMESTAMP, null, nextBatchSize);
}
}
readOffset = 0;
nextBatchSize = 0;
return block;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongDictionaryProvider method loadDictionary.
private DictionaryResult loadDictionary(StreamDescriptor streamDescriptor, InputStreamSource<LongInputStream> dictionaryDataStream, long[] dictionaryBuffer, int items) throws IOException {
// We construct and use the input stream exactly once per stream descriptor per stripe, so we don't
// really need to cache it.
LongInputStream inputStream = dictionaryDataStream.openStream();
if (inputStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but data stream is not present for %s", streamDescriptor);
}
if (dictionaryBuffer == null || dictionaryBuffer.length < items) {
dictionaryBuffer = new long[items];
}
inputStream.next(dictionaryBuffer, items);
return new DictionaryResult(dictionaryBuffer, true);
}
Aggregations