Search in sources :

Example 1 with InternalFileDecryptor

use of org.apache.parquet.crypto.InternalFileDecryptor in project parquet-mr by apache.

the class ParquetFileReader method readFooter.

private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
    long fileLen = file.getLength();
    String filePath = file.toString();
    LOG.debug("File length {}", fileLen);
    int FOOTER_LENGTH_SIZE = 4;
    if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) {
        // MAGIC + data + footer + footerIndex + MAGIC
        throw new RuntimeException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")");
    }
    // Read footer length and magic string - with a single seek
    byte[] magic = new byte[MAGIC.length];
    long fileMetadataLengthIndex = fileLen - magic.length - FOOTER_LENGTH_SIZE;
    LOG.debug("reading footer index at {}", fileMetadataLengthIndex);
    f.seek(fileMetadataLengthIndex);
    int fileMetadataLength = readIntLittleEndian(f);
    f.readFully(magic);
    boolean encryptedFooterMode;
    if (Arrays.equals(MAGIC, magic)) {
        encryptedFooterMode = false;
    } else if (Arrays.equals(EFMAGIC, magic)) {
        encryptedFooterMode = true;
    } else {
        throw new RuntimeException(filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic));
    }
    long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength;
    LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex);
    if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) {
        throw new RuntimeException("corrupted file: the footer index is not within the file: " + fileMetadataIndex);
    }
    f.seek(fileMetadataIndex);
    FileDecryptionProperties fileDecryptionProperties = options.getDecryptionProperties();
    InternalFileDecryptor fileDecryptor = null;
    if (null != fileDecryptionProperties) {
        fileDecryptor = new InternalFileDecryptor(fileDecryptionProperties);
    }
    // Read all the footer bytes in one time to avoid multiple read operations,
    // since it can be pretty time consuming for a single read operation in HDFS.
    ByteBuffer footerBytesBuffer = ByteBuffer.allocate(fileMetadataLength);
    f.readFully(footerBytesBuffer);
    LOG.debug("Finished to read all footer bytes.");
    footerBytesBuffer.flip();
    InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
    // Regular file, or encrypted file with plaintext footer
    if (!encryptedFooterMode) {
        return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
    }
    // Encrypted file with encrypted footer
    if (null == fileDecryptor) {
        throw new ParquetCryptoRuntimeException("Trying to read file with encrypted footer. No keys available");
    }
    FileCryptoMetaData fileCryptoMetaData = readFileCryptoMetaData(footerBytesStream);
    fileDecryptor.setFileCryptoMetaData(fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
    // footer length is required only for signed plaintext footers
    return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
}
Also used : ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) SeekableInputStream(org.apache.parquet.io.SeekableInputStream) SequenceInputStream(java.io.SequenceInputStream) InputStream(java.io.InputStream) InternalFileDecryptor(org.apache.parquet.crypto.InternalFileDecryptor) ByteBuffer(java.nio.ByteBuffer) FileCryptoMetaData(org.apache.parquet.format.FileCryptoMetaData) Util.readFileCryptoMetaData(org.apache.parquet.format.Util.readFileCryptoMetaData) FileDecryptionProperties(org.apache.parquet.crypto.FileDecryptionProperties)

Aggregations

InputStream (java.io.InputStream)1 SequenceInputStream (java.io.SequenceInputStream)1 ByteBuffer (java.nio.ByteBuffer)1 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)1 FileDecryptionProperties (org.apache.parquet.crypto.FileDecryptionProperties)1 InternalFileDecryptor (org.apache.parquet.crypto.InternalFileDecryptor)1 ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)1 FileCryptoMetaData (org.apache.parquet.format.FileCryptoMetaData)1 Util.readFileCryptoMetaData (org.apache.parquet.format.Util.readFileCryptoMetaData)1 SeekableInputStream (org.apache.parquet.io.SeekableInputStream)1