use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class KeyToolkit method createAndInitKmsClient.
private static KmsClient createAndInitKmsClient(Configuration configuration, String kmsInstanceID, String kmsInstanceURL, String accessToken) {
Class<?> kmsClientClass = null;
KmsClient kmsClient = null;
try {
kmsClientClass = ConfigurationUtil.getClassFromConfig(configuration, KMS_CLIENT_CLASS_PROPERTY_NAME, KmsClient.class);
if (null == kmsClientClass) {
throw new ParquetCryptoRuntimeException("Unspecified " + KMS_CLIENT_CLASS_PROPERTY_NAME);
}
kmsClient = (KmsClient) kmsClientClass.newInstance();
} catch (InstantiationException | IllegalAccessException | BadConfigurationException e) {
throw new ParquetCryptoRuntimeException("Could not instantiate KmsClient class: " + kmsClientClass, e);
}
kmsClient.initialize(configuration, kmsInstanceID, kmsInstanceURL, accessToken);
return kmsClient;
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class ParquetMetadataConverter method fromParquetMetadata.
public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata, InternalFileDecryptor fileDecryptor, boolean encryptedFooter) throws IOException {
MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders());
List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
List<RowGroup> row_groups = parquetMetadata.getRow_groups();
if (row_groups != null) {
for (RowGroup rowGroup : row_groups) {
BlockMetaData blockMetaData = new BlockMetaData();
blockMetaData.setRowCount(rowGroup.getNum_rows());
blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
// not set in legacy files
if (rowGroup.isSetOrdinal()) {
blockMetaData.setOrdinal(rowGroup.getOrdinal());
}
List<ColumnChunk> columns = rowGroup.getColumns();
String filePath = columns.get(0).getFile_path();
int columnOrdinal = -1;
for (ColumnChunk columnChunk : columns) {
columnOrdinal++;
if ((filePath == null && columnChunk.getFile_path() != null) || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
}
ColumnMetaData metaData = columnChunk.meta_data;
ColumnCryptoMetaData cryptoMetaData = columnChunk.getCrypto_metadata();
ColumnChunkMetaData column = null;
ColumnPath columnPath = null;
boolean encryptedMetadata = false;
if (null == cryptoMetaData) {
// Plaintext column
columnPath = getPath(metaData);
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
// mark this column as plaintext in encrypted file decryptor
fileDecryptor.setColumnCryptoMetadata(columnPath, false, false, (byte[]) null, columnOrdinal);
}
} else {
// Encrypted column
boolean encryptedWithFooterKey = cryptoMetaData.isSetENCRYPTION_WITH_FOOTER_KEY();
if (encryptedWithFooterKey) {
// Column encrypted with footer key
if (!encryptedFooter) {
throw new ParquetCryptoRuntimeException("Column encrypted with footer key in file with plaintext footer");
}
if (null == metaData) {
throw new ParquetCryptoRuntimeException("ColumnMetaData not set in Encryption with Footer key");
}
if (null == fileDecryptor) {
throw new ParquetCryptoRuntimeException("Column encrypted with footer key: No keys available");
}
columnPath = getPath(metaData);
fileDecryptor.setColumnCryptoMetadata(columnPath, true, true, (byte[]) null, columnOrdinal);
} else {
// Column encrypted with column key
// setColumnCryptoMetadata triggers KMS interaction, hence delayed until this column is projected
encryptedMetadata = true;
}
}
String createdBy = parquetMetadata.getCreated_by();
if (!encryptedMetadata) {
// unencrypted column, or encrypted with footer key
column = buildColumnChunkMetaData(metaData, columnPath, messageType.getType(columnPath.toArray()).asPrimitiveType(), createdBy);
column.setRowGroupOrdinal(rowGroup.getOrdinal());
if (metaData.isSetBloom_filter_offset()) {
column.setBloomFilterOffset(metaData.getBloom_filter_offset());
}
} else {
// column encrypted with column key
// Metadata will be decrypted later, if this column is accessed
EncryptionWithColumnKey columnKeyStruct = cryptoMetaData.getENCRYPTION_WITH_COLUMN_KEY();
List<String> pathList = columnKeyStruct.getPath_in_schema();
byte[] columnKeyMetadata = columnKeyStruct.getKey_metadata();
columnPath = ColumnPath.get(pathList.toArray(new String[pathList.size()]));
byte[] encryptedMetadataBuffer = columnChunk.getEncrypted_column_metadata();
column = ColumnChunkMetaData.getWithEncryptedMetadata(this, columnPath, messageType.getType(columnPath.toArray()).asPrimitiveType(), encryptedMetadataBuffer, columnKeyMetadata, fileDecryptor, rowGroup.getOrdinal(), columnOrdinal, createdBy);
}
column.setColumnIndexReference(toColumnIndexReference(columnChunk));
column.setOffsetIndexReference(toOffsetIndexReference(columnChunk));
// TODO
// index_page_offset
// key_value_metadata
blockMetaData.addColumn(column);
}
blockMetaData.setPath(filePath);
blocks.add(blockMetaData);
}
}
Map<String, String> keyValueMetaData = new HashMap<String, String>();
List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata();
if (key_value_metadata != null) {
for (KeyValue keyValue : key_value_metadata) {
keyValueMetaData.put(keyValue.key, keyValue.value);
}
}
return new ParquetMetadata(new org.apache.parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by(), fileDecryptor), blocks);
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class ParquetMetadataConverter method readParquetMetadata.
public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter, final InternalFileDecryptor fileDecryptor, final boolean encryptedFooter, final int combinedFooterLength) throws IOException {
final BlockCipher.Decryptor footerDecryptor = (encryptedFooter ? fileDecryptor.fetchFooterDecryptor() : null);
final byte[] encryptedFooterAAD = (encryptedFooter ? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);
FileMetaData fileMetaData = filter.accept(new MetadataFilterVisitor<FileMetaData, IOException>() {
@Override
public FileMetaData visit(NoFilter filter) throws IOException {
return readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
}
@Override
public FileMetaData visit(SkipMetadataFilter filter) throws IOException {
return readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD);
}
@Override
public FileMetaData visit(OffsetMetadataFilter filter) throws IOException {
return filterFileMetaDataByStart(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
}
@Override
public FileMetaData visit(RangeMetadataFilter filter) throws IOException {
return filterFileMetaDataByMidpoint(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
}
});
LOG.debug("{}", fileMetaData);
if (!encryptedFooter && null != fileDecryptor) {
if (!fileMetaData.isSetEncryption_algorithm()) {
// Plaintext file
fileDecryptor.setPlaintextFile();
// Done to detect files that were not encrypted by mistake
if (!fileDecryptor.plaintextFilesAllowed()) {
throw new ParquetCryptoRuntimeException("Applying decryptor on plaintext file");
}
} else {
// Encrypted file with plaintext footer
// if no fileDecryptor, can still read plaintext columns
fileDecryptor.setFileCryptoMetaData(fileMetaData.getEncryption_algorithm(), false, fileMetaData.getFooter_signing_key_metadata());
if (fileDecryptor.checkFooterIntegrity()) {
verifyFooterIntegrity(from, fileDecryptor, combinedFooterLength);
}
}
}
ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData, fileDecryptor, encryptedFooter);
if (LOG.isDebugEnabled())
LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata));
return parquetMetadata;
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class KeyMetadata method createSerializedForExternalMaterial.
// For external material only. For internal material, create serialized KeyMaterial directly
static String createSerializedForExternalMaterial(String keyReference) {
Map<String, Object> keyMetadataMap = new HashMap<String, Object>(3);
// 1. Write "key material type"
keyMetadataMap.put(KeyMaterial.KEY_MATERIAL_TYPE_FIELD, KeyMaterial.KEY_MATERIAL_TYPE1);
// 2. Write internal storage as false
keyMetadataMap.put(KEY_MATERIAL_INTERNAL_STORAGE_FIELD, Boolean.FALSE);
// 3. For externally stored "key material", "key metadata" keeps only a reference to it
keyMetadataMap.put(KEY_REFERENCE_FIELD, keyReference);
try {
return OBJECT_MAPPER.writeValueAsString(keyMetadataMap);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to serialize key metadata", e);
}
}
use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.
the class PropertiesDrivenCryptoFactory method getColumnEncryptionProperties.
private Map<ColumnPath, ColumnEncryptionProperties> getColumnEncryptionProperties(int dekLength, String columnKeys, FileKeyWrapper keyWrapper) throws ParquetCryptoRuntimeException {
Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = new HashMap<ColumnPath, ColumnEncryptionProperties>();
String[] keyToColumns = columnKeys.split(";");
for (int i = 0; i < keyToColumns.length; ++i) {
final String curKeyToColumns = keyToColumns[i].trim();
if (curKeyToColumns.isEmpty()) {
continue;
}
String[] parts = curKeyToColumns.split(":");
if (parts.length != 2) {
throw new ParquetCryptoRuntimeException("Incorrect key to columns mapping in " + COLUMN_KEYS_PROPERTY_NAME + ": [" + curKeyToColumns + "]");
}
String columnKeyId = parts[0].trim();
if (columnKeyId.isEmpty()) {
throw new ParquetCryptoRuntimeException("Empty key name in " + COLUMN_KEYS_PROPERTY_NAME);
}
String columnNamesStr = parts[1].trim();
String[] columnNames = columnNamesStr.split(",");
if (0 == columnNames.length) {
throw new ParquetCryptoRuntimeException("No columns to encrypt defined for key: " + columnKeyId);
}
for (int j = 0; j < columnNames.length; ++j) {
final String columnName = columnNames[j].trim();
if (columnName.isEmpty()) {
throw new ParquetCryptoRuntimeException("Empty column name in " + COLUMN_KEYS_PROPERTY_NAME + " for key: " + columnKeyId);
}
final ColumnPath columnPath = ColumnPath.fromDotString(columnName);
if (encryptedColumns.containsKey(columnPath)) {
throw new ParquetCryptoRuntimeException("Multiple keys defined for the same column: " + columnName);
}
byte[] columnKeyBytes = new byte[dekLength];
RANDOM.nextBytes(columnKeyBytes);
byte[] columnKeyKeyMetadata = keyWrapper.getEncryptionKeyMetadata(columnKeyBytes, columnKeyId, false);
ColumnEncryptionProperties cmd = ColumnEncryptionProperties.builder(columnPath).withKey(columnKeyBytes).withKeyMetaData(columnKeyKeyMetadata).build();
encryptedColumns.put(columnPath, cmd);
}
}
if (encryptedColumns.isEmpty()) {
throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
}
return encryptedColumns;
}
Aggregations