use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.
the class TestColumnIndexFiltering method createFiles.
@BeforeClass
public static void createFiles() throws IOException {
writePhoneBookToFile(FILE_V1, WriterVersion.PARQUET_1_0, null);
writePhoneBookToFile(FILE_V2, WriterVersion.PARQUET_2_0, null);
FileEncryptionProperties encryptionProperties = getFileEncryptionProperties();
writePhoneBookToFile(FILE_V1_E, ParquetProperties.WriterVersion.PARQUET_1_0, encryptionProperties);
writePhoneBookToFile(FILE_V2_E, ParquetProperties.WriterVersion.PARQUET_2_0, encryptionProperties);
}
use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.
the class TestBloomFiltering method getFileEncryptionProperties.
private static FileEncryptionProperties getFileEncryptionProperties() {
ColumnEncryptionProperties columnProperties1 = ColumnEncryptionProperties.builder("id").withKey(COLUMN_ENCRYPTION_KEY1).withKeyID(COLUMN_ENCRYPTION_KEY1_ID).build();
ColumnEncryptionProperties columnProperties2 = ColumnEncryptionProperties.builder("name").withKey(COLUMN_ENCRYPTION_KEY2).withKeyID(COLUMN_ENCRYPTION_KEY2_ID).build();
Map<ColumnPath, ColumnEncryptionProperties> columnPropertiesMap = new HashMap<>();
columnPropertiesMap.put(columnProperties1.getPath(), columnProperties1);
columnPropertiesMap.put(columnProperties2.getPath(), columnProperties2);
FileEncryptionProperties encryptionProperties = FileEncryptionProperties.builder(FOOTER_ENCRYPTION_KEY).withFooterKeyID(FOOTER_ENCRYPTION_KEY_ID).withEncryptedColumns(columnPropertiesMap).build();
return encryptionProperties;
}
use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.
the class ParquetOutputFormat method getRecordWriter.
public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, CompressionCodecName codec, Mode mode) throws IOException, InterruptedException {
final WriteSupport<T> writeSupport = getWriteSupport(conf);
ParquetProperties.Builder propsBuilder = ParquetProperties.builder().withPageSize(getPageSize(conf)).withDictionaryPageSize(getDictionaryPageSize(conf)).withDictionaryEncoding(getEnableDictionary(conf)).withWriterVersion(getWriterVersion(conf)).estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf)).withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf)).withMaxRowCountForPageSizeCheck(getMaxRowCountForPageSizeCheck(conf)).withColumnIndexTruncateLength(getColumnIndexTruncateLength(conf)).withStatisticsTruncateLength(getStatisticsTruncateLength(conf)).withMaxBloomFilterBytes(getBloomFilterMaxBytes(conf)).withBloomFilterEnabled(getBloomFilterEnabled(conf)).withPageRowCountLimit(getPageRowCountLimit(conf)).withPageWriteChecksumEnabled(getPageWriteChecksumEnabled(conf));
new ColumnConfigParser().withColumnConfig(ENABLE_DICTIONARY, key -> conf.getBoolean(key, false), propsBuilder::withDictionaryEncoding).withColumnConfig(BLOOM_FILTER_ENABLED, key -> conf.getBoolean(key, false), propsBuilder::withBloomFilterEnabled).withColumnConfig(BLOOM_FILTER_EXPECTED_NDV, key -> conf.getLong(key, -1L), propsBuilder::withBloomFilterNDV).parseConfig(conf);
ParquetProperties props = propsBuilder.build();
long blockSize = getLongBlockSize(conf);
int maxPaddingSize = getMaxPaddingSize(conf);
boolean validating = getValidation(conf);
LOG.info("ParquetRecordWriter [block size: {}b, row group padding size: {}b, validating: {}]", blockSize, maxPaddingSize, validating);
LOG.debug("Parquet properties are:\n{}", props);
WriteContext fileWriteContext = writeSupport.init(conf);
FileEncryptionProperties encryptionProperties = createEncryptionProperties(conf, file, fileWriteContext);
ParquetFileWriter w = new ParquetFileWriter(HadoopOutputFile.fromPath(file, conf), fileWriteContext.getSchema(), mode, blockSize, maxPaddingSize, props.getColumnIndexTruncateLength(), props.getStatisticsTruncateLength(), props.getPageWriteChecksumEnabled(), encryptionProperties);
w.start();
float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO, MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
long minAllocation = conf.getLong(ParquetOutputFormat.MIN_MEMORY_ALLOCATION, MemoryManager.DEFAULT_MIN_MEMORY_ALLOCATION);
synchronized (ParquetOutputFormat.class) {
if (memoryManager == null) {
memoryManager = new MemoryManager(maxLoad, minAllocation);
}
}
if (memoryManager.getMemoryPoolRatio() != maxLoad) {
LOG.warn("The configuration " + MEMORY_POOL_RATIO + " has been set. It should not " + "be reset by the new value: " + maxLoad);
}
return new ParquetRecordWriter<T>(w, writeSupport, fileWriteContext.getSchema(), fileWriteContext.getExtraMetaData(), blockSize, codec, validating, props, memoryManager, conf);
}
use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.
the class EncDecProperties method getFileEncryptionProperties.
public static FileEncryptionProperties getFileEncryptionProperties(String[] encryptColumns, ParquetCipher cipher, Boolean encryptFooter) {
if (encryptColumns.length == 0) {
return null;
}
Map<ColumnPath, ColumnEncryptionProperties> columnPropertyMap = new HashMap<>();
for (String encryptColumn : encryptColumns) {
ColumnPath columnPath = ColumnPath.fromDotString(encryptColumn);
ColumnEncryptionProperties columnEncryptionProperties = ColumnEncryptionProperties.builder(columnPath).withKey(COL_KEY).withKeyMetaData(COL_KEY_METADATA).build();
columnPropertyMap.put(columnPath, columnEncryptionProperties);
}
FileEncryptionProperties.Builder encryptionPropertiesBuilder = FileEncryptionProperties.builder(FOOTER_KEY).withFooterKeyMetadata(FOOTER_KEY_METADATA).withAlgorithm(cipher).withEncryptedColumns(columnPropertyMap);
if (!encryptFooter) {
encryptionPropertiesBuilder.withPlaintextFooter();
}
return encryptionPropertiesBuilder.build();
}
use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.
the class PropertiesDrivenCryptoFactory method getFileEncryptionProperties.
@Override
public FileEncryptionProperties getFileEncryptionProperties(Configuration fileHadoopConfig, Path tempFilePath, WriteContext fileWriteContext) throws ParquetCryptoRuntimeException {
String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
String columnKeysStr = fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
String uniformKeyId = fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
boolean emptyUniformKeyId = stringIsEmpty(uniformKeyId);
// File shouldn't be encrypted
if (emptyFooterKeyId && emptyColumnKeyIds && emptyUniformKeyId) {
LOG.debug("Unencrypted file: {}", tempFilePath);
return null;
}
if (emptyUniformKeyId) {
// Non-uniform encryption.Must have both footer and column key ids
if (emptyFooterKeyId) {
throw new ParquetCryptoRuntimeException("No footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
}
if (emptyColumnKeyIds) {
throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
}
} else {
// Uniform encryption. Can't have configuration of footer and column key ids
if (!emptyFooterKeyId) {
throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
}
if (!emptyColumnKeyIds) {
throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
}
// Now assign footer key id to uniform key id
footerKeyId = uniformKeyId;
}
FileKeyMaterialStore keyMaterialStore = null;
boolean keyMaterialInternalStorage = fileHadoopConfig.getBoolean(KeyToolkit.KEY_MATERIAL_INTERNAL_PROPERTY_NAME, KeyToolkit.KEY_MATERIAL_INTERNAL_DEFAULT);
if (!keyMaterialInternalStorage) {
if (tempFilePath == null) {
throw new ParquetCryptoRuntimeException("Output file path cannot be null");
}
try {
keyMaterialStore = new HadoopFSKeyMaterialStore(tempFilePath.getFileSystem(fileHadoopConfig));
keyMaterialStore.initialize(tempFilePath, fileHadoopConfig, false);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to get key material store", e);
}
}
FileKeyWrapper keyWrapper = new FileKeyWrapper(fileHadoopConfig, keyMaterialStore);
String algo = fileHadoopConfig.getTrimmed(ENCRYPTION_ALGORITHM_PROPERTY_NAME, ENCRYPTION_ALGORITHM_DEFAULT);
ParquetCipher cipher;
try {
cipher = ParquetCipher.valueOf(algo);
} catch (IllegalArgumentException e) {
throw new ParquetCryptoRuntimeException("Wrong encryption algorithm: " + algo);
}
int dekLengthBits = fileHadoopConfig.getInt(KeyToolkit.DATA_KEY_LENGTH_PROPERTY_NAME, KeyToolkit.DATA_KEY_LENGTH_DEFAULT);
if (Arrays.binarySearch(ACCEPTABLE_DATA_KEY_LENGTHS, dekLengthBits) < 0) {
throw new ParquetCryptoRuntimeException("Wrong data key length : " + dekLengthBits);
}
int dekLength = dekLengthBits / 8;
byte[] footerKeyBytes = new byte[dekLength];
RANDOM.nextBytes(footerKeyBytes);
byte[] footerKeyMetadata = keyWrapper.getEncryptionKeyMetadata(footerKeyBytes, footerKeyId, true);
boolean plaintextFooter = fileHadoopConfig.getBoolean(PLAINTEXT_FOOTER_PROPERTY_NAME, PLAINTEXT_FOOTER_DEFAULT);
FileEncryptionProperties.Builder propertiesBuilder = FileEncryptionProperties.builder(footerKeyBytes).withFooterKeyMetadata(footerKeyMetadata).withAlgorithm(cipher);
if (emptyUniformKeyId) {
Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
propertiesBuilder = propertiesBuilder.withEncryptedColumns(encryptedColumns);
}
if (plaintextFooter) {
propertiesBuilder = propertiesBuilder.withPlaintextFooter();
}
if (null != keyMaterialStore) {
keyMaterialStore.saveMaterial();
}
if (LOG.isDebugEnabled()) {
LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; uniform key id: {}; " + "" + "plaintext footer: {}; internal key material: {}; encrypted columns: {}", tempFilePath, cipher, footerKeyId, uniformKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
}
return propertiesBuilder.build();
}
Aggregations