Search in sources :

Example 1 with InternalColumnDecryptionSetup

use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.

the class EncryptedColumnChunkMetaData method decryptIfNeeded.

@Override
protected void decryptIfNeeded() {
    if (decrypted)
        return;
    if (null == fileDecryptor) {
        throw new ParquetCryptoRuntimeException(path + ". Null File Decryptor");
    }
    // Decrypt the ColumnMetaData
    InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.setColumnCryptoMetadata(path, true, false, columnKeyMetadata, columnOrdinal);
    ColumnMetaData metaData;
    ByteArrayInputStream tempInputStream = new ByteArrayInputStream(encryptedMetadata);
    byte[] columnMetaDataAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.ColumnMetaData, rowGroupOrdinal, columnOrdinal, -1);
    try {
        metaData = readColumnMetaData(tempInputStream, columnDecryptionSetup.getMetaDataDecryptor(), columnMetaDataAAD);
    } catch (IOException e) {
        throw new ParquetCryptoRuntimeException(path + ". Failed to decrypt column metadata", e);
    }
    decrypted = true;
    shadowColumnChunkMetaData = parquetMetadataConverter.buildColumnChunkMetaData(metaData, path, primitiveType, createdBy);
    this.encodingStats = shadowColumnChunkMetaData.encodingStats;
    this.properties = shadowColumnChunkMetaData.properties;
    if (metaData.isSetBloom_filter_offset()) {
        setBloomFilterOffset(metaData.getBloom_filter_offset());
    }
}
Also used : ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) ByteArrayInputStream(java.io.ByteArrayInputStream) IOException(java.io.IOException) Util.readColumnMetaData(org.apache.parquet.format.Util.readColumnMetaData) ColumnMetaData(org.apache.parquet.format.ColumnMetaData)

Example 2 with InternalColumnDecryptionSetup

use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.

the class ParquetFileReader method readColumnIndex.

/**
 * @param column
 *          the column chunk which the column index is to be returned for
 * @return the column index for the specified column chunk or {@code null} if there is no index
 * @throws IOException
 *           if any I/O error occurs during reading the file
 */
@Private
public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
    IndexReference ref = column.getColumnIndexReference();
    if (ref == null) {
        return null;
    }
    f.seek(ref.getOffset());
    BlockCipher.Decryptor columnIndexDecryptor = null;
    byte[] columnIndexAAD = null;
    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
        InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(column.getPath());
        if (columnDecryptionSetup.isEncrypted()) {
            columnIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
            columnIndexAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.ColumnIndex, column.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
        }
    }
    return ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(f, columnIndexDecryptor, columnIndexAAD));
}
Also used : BlockCipher(org.apache.parquet.format.BlockCipher) InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) IndexReference(org.apache.parquet.internal.hadoop.metadata.IndexReference) Private(org.apache.yetus.audience.InterfaceAudience.Private)

Example 3 with InternalColumnDecryptionSetup

use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.

the class ParquetFileReader method readDictionary.

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
    if (!meta.hasDictionaryPage()) {
        return null;
    }
    // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
    if (f.getPos() != meta.getStartingPos()) {
        f.seek(meta.getStartingPos());
    }
    boolean encryptedColumn = false;
    InternalColumnDecryptionSetup columnDecryptionSetup = null;
    byte[] dictionaryPageAAD = null;
    BlockCipher.Decryptor pageDecryptor = null;
    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
        columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
        if (columnDecryptionSetup.isEncrypted()) {
            encryptedColumn = true;
        }
    }
    PageHeader pageHeader;
    if (!encryptedColumn) {
        pageHeader = Util.readPageHeader(f);
    } else {
        byte[] dictionaryPageHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.DictionaryPageHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
        pageHeader = Util.readPageHeader(f, columnDecryptionSetup.getMetaDataDecryptor(), dictionaryPageHeaderAAD);
        dictionaryPageAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.DictionaryPage, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
        pageDecryptor = columnDecryptionSetup.getDataDecryptor();
    }
    if (!pageHeader.isSetDictionary_page_header()) {
        // TODO: should this complain?
        return null;
    }
    DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f, pageDecryptor, dictionaryPageAAD);
    BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
    return new DictionaryPage(decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding());
}
Also used : InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) BlockCipher(org.apache.parquet.format.BlockCipher) PageHeader(org.apache.parquet.format.PageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) BytesInputDecompressor(org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor) DictionaryPage(org.apache.parquet.column.page.DictionaryPage)

Example 4 with InternalColumnDecryptionSetup

use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.

the class ParquetFileReader method readChunkPages.

private void readChunkPages(Chunk chunk, BlockMetaData block, ColumnChunkPageReadStore rowGroup) throws IOException {
    if (null == fileDecryptor || fileDecryptor.plaintextFile()) {
        rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
        return;
    }
    // Encrypted file
    ColumnPath columnPath = ColumnPath.get(chunk.descriptor.col.getPath());
    InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(columnPath);
    if (!columnDecryptionSetup.isEncrypted()) {
        // plaintext column
        rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
    } else {
        // encrypted column
        rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages(columnDecryptionSetup.getMetaDataDecryptor(), columnDecryptionSetup.getDataDecryptor(), fileDecryptor.getFileAAD(), block.getOrdinal(), columnDecryptionSetup.getOrdinal()));
    }
}
Also used : InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath)

Example 5 with InternalColumnDecryptionSetup

use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.

the class ParquetFileReader method readBloomFilter.

/**
 * Reads Bloom filter data for the given column chunk.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an BloomFilter object.
 * @throws IOException if there is an error while reading the Bloom filter.
 */
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
    long bloomFilterOffset = meta.getBloomFilterOffset();
    if (bloomFilterOffset < 0) {
        return null;
    }
    // Prepare to decrypt Bloom filter (for encrypted columns)
    BlockCipher.Decryptor bloomFilterDecryptor = null;
    byte[] bloomFilterHeaderAAD = null;
    byte[] bloomFilterBitsetAAD = null;
    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
        InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
        if (columnDecryptionSetup.isEncrypted()) {
            bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
            bloomFilterHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
            bloomFilterBitsetAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterBitset, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
        }
    }
    // Read Bloom filter data header.
    f.seek(bloomFilterOffset);
    BloomFilterHeader bloomFilterHeader;
    try {
        bloomFilterHeader = Util.readBloomFilterHeader(f, bloomFilterDecryptor, bloomFilterHeaderAAD);
    } catch (IOException e) {
        LOG.warn("read no bloom filter");
        return null;
    }
    int numBytes = bloomFilterHeader.getNumBytes();
    if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
        LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
        return null;
    }
    if (!bloomFilterHeader.getHash().isSetXXHASH() || !bloomFilterHeader.getAlgorithm().isSetBLOCK() || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
        LOG.warn("the read bloom filter is not supported yet,  algorithm = {}, hash = {}, compression = {}", bloomFilterHeader.getAlgorithm(), bloomFilterHeader.getHash(), bloomFilterHeader.getCompression());
        return null;
    }
    byte[] bitset;
    if (null == bloomFilterDecryptor) {
        bitset = new byte[numBytes];
        f.readFully(bitset);
    } else {
        bitset = bloomFilterDecryptor.decrypt(f, bloomFilterBitsetAAD);
        if (bitset.length != numBytes) {
            throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
        }
    }
    return new BlockSplitBloomFilter(bitset);
}
Also used : BlockSplitBloomFilter(org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter) BlockCipher(org.apache.parquet.format.BlockCipher) InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) BloomFilterHeader(org.apache.parquet.format.BloomFilterHeader) IOException(java.io.IOException)

Aggregations

InternalColumnDecryptionSetup (org.apache.parquet.crypto.InternalColumnDecryptionSetup)6 BlockCipher (org.apache.parquet.format.BlockCipher)4 IOException (java.io.IOException)2 ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)2 IndexReference (org.apache.parquet.internal.hadoop.metadata.IndexReference)2 Private (org.apache.yetus.audience.InterfaceAudience.Private)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DictionaryPage (org.apache.parquet.column.page.DictionaryPage)1 BlockSplitBloomFilter (org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter)1 BytesInputDecompressor (org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor)1 BloomFilterHeader (org.apache.parquet.format.BloomFilterHeader)1 ColumnMetaData (org.apache.parquet.format.ColumnMetaData)1 DataPageHeader (org.apache.parquet.format.DataPageHeader)1 DictionaryPageHeader (org.apache.parquet.format.DictionaryPageHeader)1 PageHeader (org.apache.parquet.format.PageHeader)1 Util.readColumnMetaData (org.apache.parquet.format.Util.readColumnMetaData)1 ColumnPath (org.apache.parquet.hadoop.metadata.ColumnPath)1