use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class DecimalStream method nextLong.
public long nextLong() throws IOException {
long result = 0;
int offset = 0;
long b;
do {
b = input.read();
if (b == -1) {
throw new OrcCorruptionException("Reading BigInteger past EOF from " + input);
}
long work = 0x7f & b;
if (offset >= 63 && (offset != 63 || work > 1)) {
throw new OrcCorruptionException("Decimal does not fit long (invalid table schema?)");
}
result |= work << offset;
offset += 7;
} while (b >= 0x80);
boolean isNegative = (result & 0x01) != 0;
if (isNegative) {
result += 1;
result = -result;
result = result >> 1;
result |= 0x01L << 63;
} else {
result = result >> 1;
result &= MAX_VALUE;
}
return result;
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class LongInputStreamV2 method next.
@Override
public void next(short[] values, int items) throws IOException {
int offset = 0;
while (items > 0) {
if (used == numLiterals) {
numLiterals = 0;
used = 0;
readValues();
}
int chunkSize = min(numLiterals - used, items);
for (int i = 0; i < chunkSize; i++) {
long literal = literals[used + i];
short value = (short) literal;
if (literal != value) {
throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
}
values[offset + i] = value;
}
used += chunkSize;
offset += chunkSize;
items -= chunkSize;
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcInputStream method readFully.
public void readFully(byte[] buffer, int offset, int length) throws IOException {
while (offset < length) {
int result = read(buffer, offset, length - offset);
if (result < 0) {
throw new OrcCorruptionException(orcDataSourceId, "Unexpected end of stream");
}
offset += result;
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class OrcBatchPageSource method getNextPage.
@Override
public Page getNextPage() {
try {
batchId++;
int batchSize = recordReader.nextBatch();
if (batchSize <= 0) {
close();
return null;
}
completedPositions += batchSize;
Block[] blocks = new Block[hiveColumnIndexes.length];
for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
if (constantBlocks[fieldId] != null) {
blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize);
} else {
blocks[fieldId] = new LazyBlock(batchSize, new OrcBlockLoader(hiveColumnIndexes[fieldId]));
}
}
return new Page(batchSize, blocks);
} catch (PrestoException e) {
closeWithSuppression(e);
throw e;
} catch (OrcCorruptionException e) {
closeWithSuppression(e);
throw new PrestoException(HIVE_BAD_DATA, e);
} catch (IOException | RuntimeException e) {
closeWithSuppression(e);
throw new PrestoException(HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", orcDataSource.getId()), e);
}
}
use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.
the class StorageOrcFileTailSource method getOrcFileTail.
@Override
public OrcFileTail getOrcFileTail(OrcDataSource orcDataSource, MetadataReader metadataReader, Optional<OrcWriteValidation> writeValidation, boolean cacheable) throws IOException {
long size = orcDataSource.getSize();
if (size <= MAGIC.length()) {
throw new OrcCorruptionException(orcDataSource.getId(), "Invalid file size %s", size);
}
// Read the tail of the file
byte[] buffer = new byte[toIntExact(min(size, expectedFooterSizeInBytes))];
orcDataSource.readFully(size - buffer.length, buffer);
// get length of PostScript - last byte of the file
int postScriptSize = buffer[buffer.length - SIZE_OF_BYTE] & 0xff;
if (postScriptSize >= buffer.length) {
throw new OrcCorruptionException(orcDataSource.getId(), "Invalid postscript length %s", postScriptSize);
}
// decode the post script
PostScript postScript;
try {
postScript = metadataReader.readPostScript(buffer, buffer.length - SIZE_OF_BYTE - postScriptSize, postScriptSize);
} catch (OrcCorruptionException e) {
// check if this is an ORC file and not an RCFile or something else
if (!isValidHeaderMagic(orcDataSource)) {
throw new OrcCorruptionException(orcDataSource.getId(), "Not an ORC file");
}
throw e;
}
// verify this is a supported version
checkOrcVersion(orcDataSource, postScript.getVersion());
validateWrite(writeValidation, orcDataSource, validation -> validation.getVersion().equals(postScript.getVersion()), "Unexpected version");
int bufferSize = toIntExact(postScript.getCompressionBlockSize());
// check compression codec is supported
CompressionKind compressionKind = postScript.getCompression();
validateWrite(writeValidation, orcDataSource, validation -> validation.getCompression() == compressionKind, "Unexpected compression");
PostScript.HiveWriterVersion hiveWriterVersion = postScript.getHiveWriterVersion();
int footerSize = toIntExact(postScript.getFooterLength());
int metadataSize = toIntExact(postScript.getMetadataLength());
if (footerSize < 0) {
throw new OrcCorruptionException(orcDataSource.getId(), "Invalid footer length %s", footerSize);
}
if (metadataSize < 0) {
throw new OrcCorruptionException(orcDataSource.getId(), "Invalid metadata length %s", metadataSize);
}
// read DWRF stripe cache only if this feature is enabled and it has meaningful data
boolean readDwrfStripeCache = dwrfStripeCacheEnabled && postScript.getDwrfStripeCacheLength().isPresent() && postScript.getDwrfStripeCacheMode().isPresent() && postScript.getDwrfStripeCacheMode().get() != DwrfStripeCacheMode.NONE;
int dwrfStripeCacheSize = 0;
if (readDwrfStripeCache) {
dwrfStripeCacheSize = postScript.getDwrfStripeCacheLength().getAsInt();
checkSizes(orcDataSource, metadataSize, dwrfStripeCacheSize);
}
// check if extra bytes need to be read
Slice completeFooterSlice;
int completeFooterSize = dwrfStripeCacheSize + metadataSize + footerSize + postScriptSize + SIZE_OF_BYTE;
if (completeFooterSize > buffer.length) {
// allocate a new buffer large enough for the complete footer
byte[] newBuffer = new byte[completeFooterSize];
completeFooterSlice = Slices.wrappedBuffer(newBuffer);
// initial read was not large enough, so read missing section
orcDataSource.readFully(size - completeFooterSize, newBuffer, 0, completeFooterSize - buffer.length);
// copy already read bytes into the new buffer
completeFooterSlice.setBytes(completeFooterSize - buffer.length, buffer);
} else {
// footer is already in the bytes in buffer, just adjust position, length
completeFooterSlice = Slices.wrappedBuffer(buffer, buffer.length - completeFooterSize, completeFooterSize);
}
// metadataSize is set only for ORC files, dwrfStripeCacheSize is set only for DWRF files
// it should be safe to sum them up to find footer offset
// TAIL: [ ORC_METADATA{0,1} | DWRF_STRIPE_CACHE {0,1} ] + FOOTER + POST_SCRIPT + POST_SCRIPT_SIZE (1 byte)
int footerSliceOffset = metadataSize + dwrfStripeCacheSize;
Slice footerSlice = completeFooterSlice.slice(footerSliceOffset, footerSize);
Slice metadataSlice = completeFooterSlice.slice(0, metadataSize);
// set DwrfStripeCacheData only if the stripe cache feature is enabled and the file has the stripe cache
Optional<DwrfStripeCacheData> dwrfStripeCacheData = Optional.empty();
if (readDwrfStripeCache) {
Slice dwrfStripeCacheSlice = completeFooterSlice.slice(0, dwrfStripeCacheSize);
DwrfStripeCacheMode stripeCacheMode = postScript.getDwrfStripeCacheMode().get();
dwrfStripeCacheData = Optional.of(new DwrfStripeCacheData(dwrfStripeCacheSlice, dwrfStripeCacheSize, stripeCacheMode));
}
return new OrcFileTail(hiveWriterVersion, bufferSize, compressionKind, footerSlice, footerSize, metadataSlice, metadataSize, dwrfStripeCacheData);
}
Aggregations