use of org.apache.parquet.crypto.InternalFileDecryptor in project parquet-mr by apache.
the class ParquetFileReader method readFooter.
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
long fileLen = file.getLength();
String filePath = file.toString();
LOG.debug("File length {}", fileLen);
int FOOTER_LENGTH_SIZE = 4;
if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) {
// MAGIC + data + footer + footerIndex + MAGIC
throw new RuntimeException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")");
}
// Read footer length and magic string - with a single seek
byte[] magic = new byte[MAGIC.length];
long fileMetadataLengthIndex = fileLen - magic.length - FOOTER_LENGTH_SIZE;
LOG.debug("reading footer index at {}", fileMetadataLengthIndex);
f.seek(fileMetadataLengthIndex);
int fileMetadataLength = readIntLittleEndian(f);
f.readFully(magic);
boolean encryptedFooterMode;
if (Arrays.equals(MAGIC, magic)) {
encryptedFooterMode = false;
} else if (Arrays.equals(EFMAGIC, magic)) {
encryptedFooterMode = true;
} else {
throw new RuntimeException(filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic));
}
long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength;
LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex);
if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) {
throw new RuntimeException("corrupted file: the footer index is not within the file: " + fileMetadataIndex);
}
f.seek(fileMetadataIndex);
FileDecryptionProperties fileDecryptionProperties = options.getDecryptionProperties();
InternalFileDecryptor fileDecryptor = null;
if (null != fileDecryptionProperties) {
fileDecryptor = new InternalFileDecryptor(fileDecryptionProperties);
}
// Read all the footer bytes in one time to avoid multiple read operations,
// since it can be pretty time consuming for a single read operation in HDFS.
ByteBuffer footerBytesBuffer = ByteBuffer.allocate(fileMetadataLength);
f.readFully(footerBytesBuffer);
LOG.debug("Finished to read all footer bytes.");
footerBytesBuffer.flip();
InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
// Regular file, or encrypted file with plaintext footer
if (!encryptedFooterMode) {
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
}
// Encrypted file with encrypted footer
if (null == fileDecryptor) {
throw new ParquetCryptoRuntimeException("Trying to read file with encrypted footer. No keys available");
}
FileCryptoMetaData fileCryptoMetaData = readFileCryptoMetaData(footerBytesStream);
fileDecryptor.setFileCryptoMetaData(fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
// footer length is required only for signed plaintext footers
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
}
Aggregations