use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class HadoopFSKeyMaterialStore method moveMaterialTo.
@Override
public void moveMaterialTo(FileKeyMaterialStore keyMaterialStore) throws ParquetCryptoRuntimeException {
// Currently supports only moving to a HadoopFSKeyMaterialStore
HadoopFSKeyMaterialStore targetStore;
try {
targetStore = (HadoopFSKeyMaterialStore) keyMaterialStore;
} catch (ClassCastException e) {
throw new IllegalArgumentException("Currently supports only moving to HadoopFSKeyMaterialStore, not to " + keyMaterialStore.getClass(), e);
}
Path targetKeyMaterialFile = targetStore.getStorageFilePath();
try {
hadoopFileSystem.rename(keyMaterialFile, targetKeyMaterialFile);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to rename file " + keyMaterialFile, e);
}
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class ParquetFileReader method readFooter.
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
long fileLen = file.getLength();
String filePath = file.toString();
LOG.debug("File length {}", fileLen);
int FOOTER_LENGTH_SIZE = 4;
if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) {
// MAGIC + data + footer + footerIndex + MAGIC
throw new RuntimeException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")");
}
// Read footer length and magic string - with a single seek
byte[] magic = new byte[MAGIC.length];
long fileMetadataLengthIndex = fileLen - magic.length - FOOTER_LENGTH_SIZE;
LOG.debug("reading footer index at {}", fileMetadataLengthIndex);
f.seek(fileMetadataLengthIndex);
int fileMetadataLength = readIntLittleEndian(f);
f.readFully(magic);
boolean encryptedFooterMode;
if (Arrays.equals(MAGIC, magic)) {
encryptedFooterMode = false;
} else if (Arrays.equals(EFMAGIC, magic)) {
encryptedFooterMode = true;
} else {
throw new RuntimeException(filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic));
}
long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength;
LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex);
if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) {
throw new RuntimeException("corrupted file: the footer index is not within the file: " + fileMetadataIndex);
}
f.seek(fileMetadataIndex);
FileDecryptionProperties fileDecryptionProperties = options.getDecryptionProperties();
InternalFileDecryptor fileDecryptor = null;
if (null != fileDecryptionProperties) {
fileDecryptor = new InternalFileDecryptor(fileDecryptionProperties);
}
// Read all the footer bytes in one time to avoid multiple read operations,
// since it can be pretty time consuming for a single read operation in HDFS.
ByteBuffer footerBytesBuffer = ByteBuffer.allocate(fileMetadataLength);
f.readFully(footerBytesBuffer);
LOG.debug("Finished to read all footer bytes.");
footerBytesBuffer.flip();
InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
// Regular file, or encrypted file with plaintext footer
if (!encryptedFooterMode) {
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
}
// Encrypted file with encrypted footer
if (null == fileDecryptor) {
throw new ParquetCryptoRuntimeException("Trying to read file with encrypted footer. No keys available");
}
FileCryptoMetaData fileCryptoMetaData = readFileCryptoMetaData(footerBytesStream);
fileDecryptor.setFileCryptoMetaData(fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
// footer length is required only for signed plaintext footers
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class ParquetFileReader method readBloomFilter.
/**
* Reads Bloom filter data for the given column chunk.
*
* @param meta a column's ColumnChunkMetaData to read the dictionary from
* @return an BloomFilter object.
* @throws IOException if there is an error while reading the Bloom filter.
*/
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
long bloomFilterOffset = meta.getBloomFilterOffset();
if (bloomFilterOffset < 0) {
return null;
}
// Prepare to decrypt Bloom filter (for encrypted columns)
BlockCipher.Decryptor bloomFilterDecryptor = null;
byte[] bloomFilterHeaderAAD = null;
byte[] bloomFilterBitsetAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
if (columnDecryptionSetup.isEncrypted()) {
bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
bloomFilterHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
bloomFilterBitsetAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterBitset, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
// Read Bloom filter data header.
f.seek(bloomFilterOffset);
BloomFilterHeader bloomFilterHeader;
try {
bloomFilterHeader = Util.readBloomFilterHeader(f, bloomFilterDecryptor, bloomFilterHeaderAAD);
} catch (IOException e) {
LOG.warn("read no bloom filter");
return null;
}
int numBytes = bloomFilterHeader.getNumBytes();
if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
return null;
}
if (!bloomFilterHeader.getHash().isSetXXHASH() || !bloomFilterHeader.getAlgorithm().isSetBLOCK() || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
LOG.warn("the read bloom filter is not supported yet, algorithm = {}, hash = {}, compression = {}", bloomFilterHeader.getAlgorithm(), bloomFilterHeader.getHash(), bloomFilterHeader.getCompression());
return null;
}
byte[] bitset;
if (null == bloomFilterDecryptor) {
bitset = new byte[numBytes];
f.readFully(bitset);
} else {
bitset = bloomFilterDecryptor.decrypt(f, bloomFilterBitsetAAD);
if (bitset.length != numBytes) {
throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
}
}
return new BlockSplitBloomFilter(bitset);
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class ParquetMetadataConverter method addRowGroup.
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block, InternalFileEncryptor fileEncryptor) {
// rowGroup.total_byte_size = ;
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
int rowGroupOrdinal = rowGroups.size();
int columnOrdinal = -1;
ByteArrayOutputStream tempOutStream = null;
for (ColumnChunkMetaData columnMetaData : columns) {
// verify this is the right offset
ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset());
// they are in the same file for now
columnChunk.file_path = block.getPath();
InternalColumnEncryptionSetup columnSetup = null;
boolean writeCryptoMetadata = false;
boolean encryptMetaData = false;
ColumnPath path = columnMetaData.getPath();
if (null != fileEncryptor) {
columnOrdinal++;
columnSetup = fileEncryptor.getColumnSetup(path, false, columnOrdinal);
writeCryptoMetadata = columnSetup.isEncrypted();
encryptMetaData = fileEncryptor.encryptColumnMetaData(columnSetup);
}
ColumnMetaData metaData = new ColumnMetaData(getType(columnMetaData.getType()), toFormatEncodings(columnMetaData.getEncodings()), Arrays.asList(columnMetaData.getPath().toArray()), toFormatCodec(columnMetaData.getCodec()), columnMetaData.getValueCount(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset());
if (columnMetaData.getEncodingStats() != null && columnMetaData.getEncodingStats().hasDictionaryPages()) {
metaData.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset());
}
long bloomFilterOffset = columnMetaData.getBloomFilterOffset();
if (bloomFilterOffset >= 0) {
metaData.setBloom_filter_offset(bloomFilterOffset);
}
if (columnMetaData.getStatistics() != null && !columnMetaData.getStatistics().isEmpty()) {
metaData.setStatistics(toParquetStatistics(columnMetaData.getStatistics(), this.statisticsTruncateLength));
}
if (columnMetaData.getEncodingStats() != null) {
metaData.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats()));
}
if (!encryptMetaData) {
columnChunk.setMeta_data(metaData);
} else {
// Serialize and encrypt ColumnMetadata separately
byte[] columnMetaDataAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.ColumnMetaData, rowGroupOrdinal, columnSetup.getOrdinal(), -1);
if (null == tempOutStream) {
tempOutStream = new ByteArrayOutputStream();
} else {
tempOutStream.reset();
}
try {
writeColumnMetaData(metaData, tempOutStream, columnSetup.getMetaDataEncryptor(), columnMetaDataAAD);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to serialize and encrypt ColumnMetadata for " + columnMetaData.getPath(), e);
}
columnChunk.setEncrypted_column_metadata(tempOutStream.toByteArray());
// Keep redacted metadata version for old readers
if (!fileEncryptor.isFooterEncrypted()) {
ColumnMetaData metaDataRedacted = metaData.deepCopy();
if (metaDataRedacted.isSetStatistics())
metaDataRedacted.unsetStatistics();
if (metaDataRedacted.isSetEncoding_stats())
metaDataRedacted.unsetEncoding_stats();
columnChunk.setMeta_data(metaDataRedacted);
}
}
if (writeCryptoMetadata) {
columnChunk.setCrypto_metadata(columnSetup.getColumnCryptoMetaData());
}
// columnChunk.meta_data.index_page_offset = ;
// columnChunk.meta_data.key_value_metadata = ; // nothing yet
IndexReference columnIndexRef = columnMetaData.getColumnIndexReference();
if (columnIndexRef != null) {
columnChunk.setColumn_index_offset(columnIndexRef.getOffset());
columnChunk.setColumn_index_length(columnIndexRef.getLength());
}
IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference();
if (offsetIndexRef != null) {
columnChunk.setOffset_index_offset(offsetIndexRef.getOffset());
columnChunk.setOffset_index_length(offsetIndexRef.getLength());
}
parquetColumns.add(columnChunk);
}
RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
rowGroup.setFile_offset(block.getStartingPos());
rowGroup.setTotal_compressed_size(block.getCompressedSize());
rowGroup.setOrdinal((short) rowGroupOrdinal);
rowGroups.add(rowGroup);
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class KeyMetadata method parse.
static KeyMetadata parse(byte[] keyMetadataBytes) {
String keyMetaDataString = new String(keyMetadataBytes, StandardCharsets.UTF_8);
Map<String, Object> keyMetadataJson = null;
try {
keyMetadataJson = OBJECT_MAPPER.readValue(new StringReader(keyMetaDataString), new TypeReference<Map<String, Object>>() {
});
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to parse key metadata " + keyMetaDataString, e);
}
// 1. Extract "key material type", and make sure it is supported
String keyMaterialType = (String) keyMetadataJson.get(KeyMaterial.KEY_MATERIAL_TYPE_FIELD);
if (!KeyMaterial.KEY_MATERIAL_TYPE1.equals(keyMaterialType)) {
throw new ParquetCryptoRuntimeException("Wrong key material type: " + keyMaterialType + " vs " + KeyMaterial.KEY_MATERIAL_TYPE1);
}
// 2. Check if "key material" is stored internally in Parquet file key metadata, or is stored externally
Boolean isInternalStorage = (Boolean) keyMetadataJson.get(KEY_MATERIAL_INTERNAL_STORAGE_FIELD);
String keyReference;
KeyMaterial keyMaterial;
if (isInternalStorage) {
// 3.1 "key material" is stored internally, inside "key metadata" - parse it
keyMaterial = KeyMaterial.parse(keyMetadataJson);
keyReference = null;
} else {
// 3.2 "key material" is stored externally. "key metadata" keeps a reference to it
keyReference = (String) keyMetadataJson.get(KEY_REFERENCE_FIELD);
keyMaterial = null;
}
return new KeyMetadata(isInternalStorage, keyReference, keyMaterial);
}
Aggregations