use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.
the class EncryptedColumnChunkMetaData method decryptIfNeeded.
@Override
protected void decryptIfNeeded() {
if (decrypted)
return;
if (null == fileDecryptor) {
throw new ParquetCryptoRuntimeException(path + ". Null File Decryptor");
}
// Decrypt the ColumnMetaData
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.setColumnCryptoMetadata(path, true, false, columnKeyMetadata, columnOrdinal);
ColumnMetaData metaData;
ByteArrayInputStream tempInputStream = new ByteArrayInputStream(encryptedMetadata);
byte[] columnMetaDataAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.ColumnMetaData, rowGroupOrdinal, columnOrdinal, -1);
try {
metaData = readColumnMetaData(tempInputStream, columnDecryptionSetup.getMetaDataDecryptor(), columnMetaDataAAD);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException(path + ". Failed to decrypt column metadata", e);
}
decrypted = true;
shadowColumnChunkMetaData = parquetMetadataConverter.buildColumnChunkMetaData(metaData, path, primitiveType, createdBy);
this.encodingStats = shadowColumnChunkMetaData.encodingStats;
this.properties = shadowColumnChunkMetaData.properties;
if (metaData.isSetBloom_filter_offset()) {
setBloomFilterOffset(metaData.getBloom_filter_offset());
}
}
use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.
the class ParquetFileReader method readColumnIndex.
/**
* @param column
* the column chunk which the column index is to be returned for
* @return the column index for the specified column chunk or {@code null} if there is no index
* @throws IOException
* if any I/O error occurs during reading the file
*/
@Private
public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
IndexReference ref = column.getColumnIndexReference();
if (ref == null) {
return null;
}
f.seek(ref.getOffset());
BlockCipher.Decryptor columnIndexDecryptor = null;
byte[] columnIndexAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(column.getPath());
if (columnDecryptionSetup.isEncrypted()) {
columnIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
columnIndexAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.ColumnIndex, column.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
return ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(f, columnIndexDecryptor, columnIndexAAD));
}
use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.
the class ParquetFileReader method readDictionary.
/**
* Reads and decompresses a dictionary page for the given column chunk.
*
* Returns null if the given column chunk has no dictionary page.
*
* @param meta a column's ColumnChunkMetaData to read the dictionary from
* @return an uncompressed DictionaryPage or null
* @throws IOException if there is an error while reading the dictionary
*/
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
if (!meta.hasDictionaryPage()) {
return null;
}
// TODO: this should use getDictionaryPageOffset() but it isn't reliable.
if (f.getPos() != meta.getStartingPos()) {
f.seek(meta.getStartingPos());
}
boolean encryptedColumn = false;
InternalColumnDecryptionSetup columnDecryptionSetup = null;
byte[] dictionaryPageAAD = null;
BlockCipher.Decryptor pageDecryptor = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
if (columnDecryptionSetup.isEncrypted()) {
encryptedColumn = true;
}
}
PageHeader pageHeader;
if (!encryptedColumn) {
pageHeader = Util.readPageHeader(f);
} else {
byte[] dictionaryPageHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.DictionaryPageHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
pageHeader = Util.readPageHeader(f, columnDecryptionSetup.getMetaDataDecryptor(), dictionaryPageHeaderAAD);
dictionaryPageAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.DictionaryPage, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
pageDecryptor = columnDecryptionSetup.getDataDecryptor();
}
if (!pageHeader.isSetDictionary_page_header()) {
// TODO: should this complain?
return null;
}
DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f, pageDecryptor, dictionaryPageAAD);
BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
return new DictionaryPage(decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding());
}
use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.
the class ParquetFileReader method readChunkPages.
private void readChunkPages(Chunk chunk, BlockMetaData block, ColumnChunkPageReadStore rowGroup) throws IOException {
if (null == fileDecryptor || fileDecryptor.plaintextFile()) {
rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
return;
}
// Encrypted file
ColumnPath columnPath = ColumnPath.get(chunk.descriptor.col.getPath());
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(columnPath);
if (!columnDecryptionSetup.isEncrypted()) {
// plaintext column
rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
} else {
// encrypted column
rowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages(columnDecryptionSetup.getMetaDataDecryptor(), columnDecryptionSetup.getDataDecryptor(), fileDecryptor.getFileAAD(), block.getOrdinal(), columnDecryptionSetup.getOrdinal()));
}
}
use of org.apache.parquet.crypto.InternalColumnDecryptionSetup in project parquet-mr by apache.
the class ParquetFileReader method readBloomFilter.
/**
* Reads Bloom filter data for the given column chunk.
*
* @param meta a column's ColumnChunkMetaData to read the dictionary from
* @return an BloomFilter object.
* @throws IOException if there is an error while reading the Bloom filter.
*/
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
long bloomFilterOffset = meta.getBloomFilterOffset();
if (bloomFilterOffset < 0) {
return null;
}
// Prepare to decrypt Bloom filter (for encrypted columns)
BlockCipher.Decryptor bloomFilterDecryptor = null;
byte[] bloomFilterHeaderAAD = null;
byte[] bloomFilterBitsetAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
if (columnDecryptionSetup.isEncrypted()) {
bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
bloomFilterHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
bloomFilterBitsetAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterBitset, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
// Read Bloom filter data header.
f.seek(bloomFilterOffset);
BloomFilterHeader bloomFilterHeader;
try {
bloomFilterHeader = Util.readBloomFilterHeader(f, bloomFilterDecryptor, bloomFilterHeaderAAD);
} catch (IOException e) {
LOG.warn("read no bloom filter");
return null;
}
int numBytes = bloomFilterHeader.getNumBytes();
if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
return null;
}
if (!bloomFilterHeader.getHash().isSetXXHASH() || !bloomFilterHeader.getAlgorithm().isSetBLOCK() || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
LOG.warn("the read bloom filter is not supported yet, algorithm = {}, hash = {}, compression = {}", bloomFilterHeader.getAlgorithm(), bloomFilterHeader.getHash(), bloomFilterHeader.getCompression());
return null;
}
byte[] bitset;
if (null == bloomFilterDecryptor) {
bitset = new byte[numBytes];
f.readFully(bitset);
} else {
bitset = bloomFilterDecryptor.decrypt(f, bloomFilterBitsetAAD);
if (bitset.length != numBytes) {
throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
}
}
return new BlockSplitBloomFilter(bitset);
}
Aggregations