use of io.prestosql.orc.OrcCorruptionException in project boostkit-bigdata by kunpengcompute.
the class OrcDeletedRows method isDeleted.
private boolean isDeleted(OrcAcidRowId sourcePageRowId) {
if (sortedRowsIterator == null) {
for (WriteIdInfo deleteDeltaInfo : deleteDeltaLocations.getDeleteDeltas()) {
Path path = createPath(deleteDeltaLocations.getPartitionLocation(), deleteDeltaInfo, sourceFileName);
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
FileStatus fileStatus = hdfsEnvironment.doAs(sessionUser, () -> fileSystem.getFileStatus(path));
pageSources.add(pageSourceFactory.createPageSource(fileStatus.getPath(), fileStatus.getLen(), fileStatus.getModificationTime()));
} catch (FileNotFoundException ignored) {
// source file does not have a delta delete file in this location
continue;
} catch (PrestoException e) {
throw e;
} catch (OrcCorruptionException e) {
throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, format("Failed to read ORC file: %s", path), e);
} catch (RuntimeException | IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", path), e);
}
}
List<Type> columnTypes = ImmutableList.of(BigintType.BIGINT, IntegerType.INTEGER, BigintType.BIGINT);
// Last index for rowIdHandle
List<Integer> sortFields = ImmutableList.of(0, 1, 2);
List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST);
sortedRowsIterator = HiveUtil.getMergeSortedPages(pageSources, columnTypes, sortFields, sortOrders);
}
do {
if (currentPage == null || currentPageOffset >= currentPage.getPositionCount()) {
currentPage = null;
currentPageOffset = 0;
if (sortedRowsIterator.hasNext()) {
currentPage = sortedRowsIterator.next();
} else {
// No more entries in deleted_delta
return false;
}
}
do {
deletedRowId.set(currentPage, currentPageOffset);
if (deletedRowId.compareTo(sourcePageRowId) == 0) {
// source row is deleted.
return true;
} else if (deletedRowId.compareTo(sourcePageRowId) > 0) {
// So current source row is not deleted.
return false;
}
currentPageOffset++;
} while (currentPageOffset < currentPage.getPositionCount());
} while (sortedRowsIterator.hasNext());
// No more entries;
return false;
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class SliceDirectColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
long dataSkipSize = lengthStream.sum(readOffset);
if (dataSkipSize > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(dataSkipSize);
}
}
}
if (lengthStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
// create new isNullVector and offsetVector for VariableWidthBlock
boolean[] isNullVector = null;
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
if (presentStream == null) {
lengthStream.next(offsetVector, nextBatchSize);
} else {
isNullVector = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullCount == nextBatchSize) {
// all nulls
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
if (nullCount == 0) {
isNullVector = null;
lengthStream.next(offsetVector, nextBatchSize);
} else {
lengthStream.next(offsetVector, nextBatchSize - nullCount);
unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
}
}
// Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
long totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) {
totalLength += offsetVector[i];
}
int currentBatchSize = nextBatchSize;
readOffset = 0;
nextBatchSize = 0;
if (totalLength == 0) {
return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
}
if (totalLength > ONE_GIGABYTE) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", column.getPath(), nextBatchSize, column.getOrcDataSourceId()));
}
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
// allocate enough space to read
byte[] data = new byte[toIntExact(totalLength)];
Slice slice = Slices.wrappedBuffer(data);
if (maxCodePointCount < 0) {
// unbounded, simply read all data in on shot
dataStream.next(data, 0, data.length);
convertLengthVectorToOffsetVector(offsetVector);
} else {
// We do the following operations together in the for loop:
// * truncate strings
// * convert original length values in offsetVector into truncated offset values
int currentLength = offsetVector[0];
offsetVector[0] = 0;
for (int i = 1; i <= currentBatchSize; i++) {
int nextLength = offsetVector[i];
if (isNullVector != null && isNullVector[i - 1]) {
checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
offsetVector[i] = offsetVector[i - 1];
currentLength = nextLength;
continue;
}
int offset = offsetVector[i - 1];
// read data without truncation
dataStream.next(data, offset, offset + currentLength);
// adjust offsetVector with truncated length
int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
verify(truncatedLength >= 0);
offsetVector[i] = offset + truncatedLength;
currentLength = nextLength;
}
}
// this can lead to over-retention but unlikely to happen given truncation rarely happens
return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV1 method next.
@Override
public void next(short[] values, int inputItems) throws IOException {
int items = inputItems;
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
if (repeat) {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[0] + ((used + i) * delta);
short value = (short) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
}
values[offset + i] = value;
}
} else {
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
short value = (short) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
}
values[offset + i] = value;
}
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV2 method readValues.
// This comes from the Apache Hive ORC code
private void readValues() throws IOException {
lastReadInputCheckpoint = input.getCheckpoint();
// read the first 2 bits and determine the encoding type
int firstByte = input.read();
if (firstByte < 0) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
}
int enc = (firstByte >>> 6) & 0x03;
if (EncodingType.SHORT_REPEAT.ordinal() == enc) {
readShortRepeatValues(firstByte);
} else if (EncodingType.DIRECT.ordinal() == enc) {
readDirectValues(firstByte);
} else if (EncodingType.PATCHED_BASE.ordinal() == enc) {
readPatchedBaseValues(firstByte);
} else {
readDeltaValues(firstByte);
}
}
use of io.prestosql.orc.OrcCorruptionException in project hetu-core by openlookeng.
the class LongInputStreamV2 method next.
@Override
public void next(short[] values, int inputItems) throws IOException {
int items = inputItems;
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
short value = (short) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
}
values[offset + i] = value;
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
Aggregations