Search in sources :

Example 6 with FileEncryptionProperties

use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.

the class TestColumnIndexFiltering method createFiles.

@BeforeClass
public static void createFiles() throws IOException {
    writePhoneBookToFile(FILE_V1, WriterVersion.PARQUET_1_0, null);
    writePhoneBookToFile(FILE_V2, WriterVersion.PARQUET_2_0, null);
    FileEncryptionProperties encryptionProperties = getFileEncryptionProperties();
    writePhoneBookToFile(FILE_V1_E, ParquetProperties.WriterVersion.PARQUET_1_0, encryptionProperties);
    writePhoneBookToFile(FILE_V2_E, ParquetProperties.WriterVersion.PARQUET_2_0, encryptionProperties);
}
Also used : FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) BeforeClass(org.junit.BeforeClass)

Example 7 with FileEncryptionProperties

use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.

the class TestBloomFiltering method getFileEncryptionProperties.

private static FileEncryptionProperties getFileEncryptionProperties() {
    ColumnEncryptionProperties columnProperties1 = ColumnEncryptionProperties.builder("id").withKey(COLUMN_ENCRYPTION_KEY1).withKeyID(COLUMN_ENCRYPTION_KEY1_ID).build();
    ColumnEncryptionProperties columnProperties2 = ColumnEncryptionProperties.builder("name").withKey(COLUMN_ENCRYPTION_KEY2).withKeyID(COLUMN_ENCRYPTION_KEY2_ID).build();
    Map<ColumnPath, ColumnEncryptionProperties> columnPropertiesMap = new HashMap<>();
    columnPropertiesMap.put(columnProperties1.getPath(), columnProperties1);
    columnPropertiesMap.put(columnProperties2.getPath(), columnProperties2);
    FileEncryptionProperties encryptionProperties = FileEncryptionProperties.builder(FOOTER_ENCRYPTION_KEY).withFooterKeyID(FOOTER_ENCRYPTION_KEY_ID).withEncryptedColumns(columnPropertiesMap).build();
    return encryptionProperties;
}
Also used : HashMap(java.util.HashMap) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) ColumnEncryptionProperties(org.apache.parquet.crypto.ColumnEncryptionProperties) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath)

Example 8 with FileEncryptionProperties

use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.

the class ParquetOutputFormat method getRecordWriter.

public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, CompressionCodecName codec, Mode mode) throws IOException, InterruptedException {
    final WriteSupport<T> writeSupport = getWriteSupport(conf);
    ParquetProperties.Builder propsBuilder = ParquetProperties.builder().withPageSize(getPageSize(conf)).withDictionaryPageSize(getDictionaryPageSize(conf)).withDictionaryEncoding(getEnableDictionary(conf)).withWriterVersion(getWriterVersion(conf)).estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf)).withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf)).withMaxRowCountForPageSizeCheck(getMaxRowCountForPageSizeCheck(conf)).withColumnIndexTruncateLength(getColumnIndexTruncateLength(conf)).withStatisticsTruncateLength(getStatisticsTruncateLength(conf)).withMaxBloomFilterBytes(getBloomFilterMaxBytes(conf)).withBloomFilterEnabled(getBloomFilterEnabled(conf)).withPageRowCountLimit(getPageRowCountLimit(conf)).withPageWriteChecksumEnabled(getPageWriteChecksumEnabled(conf));
    new ColumnConfigParser().withColumnConfig(ENABLE_DICTIONARY, key -> conf.getBoolean(key, false), propsBuilder::withDictionaryEncoding).withColumnConfig(BLOOM_FILTER_ENABLED, key -> conf.getBoolean(key, false), propsBuilder::withBloomFilterEnabled).withColumnConfig(BLOOM_FILTER_EXPECTED_NDV, key -> conf.getLong(key, -1L), propsBuilder::withBloomFilterNDV).parseConfig(conf);
    ParquetProperties props = propsBuilder.build();
    long blockSize = getLongBlockSize(conf);
    int maxPaddingSize = getMaxPaddingSize(conf);
    boolean validating = getValidation(conf);
    LOG.info("ParquetRecordWriter [block size: {}b, row group padding size: {}b, validating: {}]", blockSize, maxPaddingSize, validating);
    LOG.debug("Parquet properties are:\n{}", props);
    WriteContext fileWriteContext = writeSupport.init(conf);
    FileEncryptionProperties encryptionProperties = createEncryptionProperties(conf, file, fileWriteContext);
    ParquetFileWriter w = new ParquetFileWriter(HadoopOutputFile.fromPath(file, conf), fileWriteContext.getSchema(), mode, blockSize, maxPaddingSize, props.getColumnIndexTruncateLength(), props.getStatisticsTruncateLength(), props.getPageWriteChecksumEnabled(), encryptionProperties);
    w.start();
    float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO, MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
    long minAllocation = conf.getLong(ParquetOutputFormat.MIN_MEMORY_ALLOCATION, MemoryManager.DEFAULT_MIN_MEMORY_ALLOCATION);
    synchronized (ParquetOutputFormat.class) {
        if (memoryManager == null) {
            memoryManager = new MemoryManager(maxLoad, minAllocation);
        }
    }
    if (memoryManager.getMemoryPoolRatio() != maxLoad) {
        LOG.warn("The configuration " + MEMORY_POOL_RATIO + " has been set. It should not " + "be reset by the new value: " + maxLoad);
    }
    return new ParquetRecordWriter<T>(w, writeSupport, fileWriteContext.getSchema(), fileWriteContext.getExtraMetaData(), blockSize, codec, validating, props, memoryManager, conf);
}
Also used : RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) LoggerFactory(org.slf4j.LoggerFactory) EncryptionPropertiesFactory(org.apache.parquet.crypto.EncryptionPropertiesFactory) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ConfigurationUtil(org.apache.parquet.hadoop.util.ConfigurationUtil) Mode(org.apache.parquet.hadoop.ParquetFileWriter.Mode) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) ParquetProperties(org.apache.parquet.column.ParquetProperties) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) CodecConfig(org.apache.parquet.hadoop.codec.CodecConfig) Logger(org.slf4j.Logger) WriterVersion(org.apache.parquet.column.ParquetProperties.WriterVersion) HadoopOutputFile(org.apache.parquet.hadoop.util.HadoopOutputFile) IOException(java.io.IOException) WriteContext(org.apache.parquet.hadoop.api.WriteSupport.WriteContext) DEFAULT_BLOOM_FILTER_ENABLED(org.apache.parquet.column.ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED) Objects(java.util.Objects) JobConf(org.apache.hadoop.mapred.JobConf) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Job(org.apache.hadoop.mapreduce.Job) JobContext(org.apache.hadoop.mapreduce.JobContext) ContextUtil.getConfiguration(org.apache.parquet.hadoop.util.ContextUtil.getConfiguration) WriteSupport(org.apache.parquet.hadoop.api.WriteSupport) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) DEFAULT_BLOCK_SIZE(org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE) ParquetProperties(org.apache.parquet.column.ParquetProperties) WriteContext(org.apache.parquet.hadoop.api.WriteSupport.WriteContext) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties)

Example 9 with FileEncryptionProperties

use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.

the class EncDecProperties method getFileEncryptionProperties.

public static FileEncryptionProperties getFileEncryptionProperties(String[] encryptColumns, ParquetCipher cipher, Boolean encryptFooter) {
    if (encryptColumns.length == 0) {
        return null;
    }
    Map<ColumnPath, ColumnEncryptionProperties> columnPropertyMap = new HashMap<>();
    for (String encryptColumn : encryptColumns) {
        ColumnPath columnPath = ColumnPath.fromDotString(encryptColumn);
        ColumnEncryptionProperties columnEncryptionProperties = ColumnEncryptionProperties.builder(columnPath).withKey(COL_KEY).withKeyMetaData(COL_KEY_METADATA).build();
        columnPropertyMap.put(columnPath, columnEncryptionProperties);
    }
    FileEncryptionProperties.Builder encryptionPropertiesBuilder = FileEncryptionProperties.builder(FOOTER_KEY).withFooterKeyMetadata(FOOTER_KEY_METADATA).withAlgorithm(cipher).withEncryptedColumns(columnPropertyMap);
    if (!encryptFooter) {
        encryptionPropertiesBuilder.withPlaintextFooter();
    }
    return encryptionPropertiesBuilder.build();
}
Also used : HashMap(java.util.HashMap) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) ColumnEncryptionProperties(org.apache.parquet.crypto.ColumnEncryptionProperties) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath)

Example 10 with FileEncryptionProperties

use of org.apache.parquet.crypto.FileEncryptionProperties in project parquet-mr by apache.

the class PropertiesDrivenCryptoFactory method getFileEncryptionProperties.

@Override
public FileEncryptionProperties getFileEncryptionProperties(Configuration fileHadoopConfig, Path tempFilePath, WriteContext fileWriteContext) throws ParquetCryptoRuntimeException {
    String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
    String columnKeysStr = fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
    String uniformKeyId = fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
    boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
    boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
    boolean emptyUniformKeyId = stringIsEmpty(uniformKeyId);
    // File shouldn't be encrypted
    if (emptyFooterKeyId && emptyColumnKeyIds && emptyUniformKeyId) {
        LOG.debug("Unencrypted file: {}", tempFilePath);
        return null;
    }
    if (emptyUniformKeyId) {
        // Non-uniform encryption.Must have both footer and column key ids
        if (emptyFooterKeyId) {
            throw new ParquetCryptoRuntimeException("No footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
        }
        if (emptyColumnKeyIds) {
            throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
        }
    } else {
        // Uniform encryption. Can't have configuration of footer and column key ids
        if (!emptyFooterKeyId) {
            throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
        }
        if (!emptyColumnKeyIds) {
            throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
        }
        // Now assign footer key id to uniform key id
        footerKeyId = uniformKeyId;
    }
    FileKeyMaterialStore keyMaterialStore = null;
    boolean keyMaterialInternalStorage = fileHadoopConfig.getBoolean(KeyToolkit.KEY_MATERIAL_INTERNAL_PROPERTY_NAME, KeyToolkit.KEY_MATERIAL_INTERNAL_DEFAULT);
    if (!keyMaterialInternalStorage) {
        if (tempFilePath == null) {
            throw new ParquetCryptoRuntimeException("Output file path cannot be null");
        }
        try {
            keyMaterialStore = new HadoopFSKeyMaterialStore(tempFilePath.getFileSystem(fileHadoopConfig));
            keyMaterialStore.initialize(tempFilePath, fileHadoopConfig, false);
        } catch (IOException e) {
            throw new ParquetCryptoRuntimeException("Failed to get key material store", e);
        }
    }
    FileKeyWrapper keyWrapper = new FileKeyWrapper(fileHadoopConfig, keyMaterialStore);
    String algo = fileHadoopConfig.getTrimmed(ENCRYPTION_ALGORITHM_PROPERTY_NAME, ENCRYPTION_ALGORITHM_DEFAULT);
    ParquetCipher cipher;
    try {
        cipher = ParquetCipher.valueOf(algo);
    } catch (IllegalArgumentException e) {
        throw new ParquetCryptoRuntimeException("Wrong encryption algorithm: " + algo);
    }
    int dekLengthBits = fileHadoopConfig.getInt(KeyToolkit.DATA_KEY_LENGTH_PROPERTY_NAME, KeyToolkit.DATA_KEY_LENGTH_DEFAULT);
    if (Arrays.binarySearch(ACCEPTABLE_DATA_KEY_LENGTHS, dekLengthBits) < 0) {
        throw new ParquetCryptoRuntimeException("Wrong data key length : " + dekLengthBits);
    }
    int dekLength = dekLengthBits / 8;
    byte[] footerKeyBytes = new byte[dekLength];
    RANDOM.nextBytes(footerKeyBytes);
    byte[] footerKeyMetadata = keyWrapper.getEncryptionKeyMetadata(footerKeyBytes, footerKeyId, true);
    boolean plaintextFooter = fileHadoopConfig.getBoolean(PLAINTEXT_FOOTER_PROPERTY_NAME, PLAINTEXT_FOOTER_DEFAULT);
    FileEncryptionProperties.Builder propertiesBuilder = FileEncryptionProperties.builder(footerKeyBytes).withFooterKeyMetadata(footerKeyMetadata).withAlgorithm(cipher);
    if (emptyUniformKeyId) {
        Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
        propertiesBuilder = propertiesBuilder.withEncryptedColumns(encryptedColumns);
    }
    if (plaintextFooter) {
        propertiesBuilder = propertiesBuilder.withPlaintextFooter();
    }
    if (null != keyMaterialStore) {
        keyMaterialStore.saveMaterial();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; uniform key id: {}; " + "" + "plaintext footer: {}; internal key material: {}; encrypted columns: {}", tempFilePath, cipher, footerKeyId, uniformKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
    }
    return propertiesBuilder.build();
}
Also used : ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ColumnEncryptionProperties(org.apache.parquet.crypto.ColumnEncryptionProperties) IOException(java.io.IOException) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) ParquetCipher(org.apache.parquet.crypto.ParquetCipher)

Aggregations

FileEncryptionProperties (org.apache.parquet.crypto.FileEncryptionProperties)10 ColumnPath (org.apache.parquet.hadoop.metadata.ColumnPath)6 ColumnEncryptionProperties (org.apache.parquet.crypto.ColumnEncryptionProperties)5 HashMap (java.util.HashMap)4 Path (org.apache.hadoop.fs.Path)3 IOException (java.io.IOException)2 Configuration (org.apache.hadoop.conf.Configuration)2 ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)2 BeforeClass (org.junit.BeforeClass)2 Objects (java.util.Objects)1 JobConf (org.apache.hadoop.mapred.JobConf)1 Job (org.apache.hadoop.mapreduce.Job)1 JobContext (org.apache.hadoop.mapreduce.JobContext)1 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)1 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)1 ParquetProperties (org.apache.parquet.column.ParquetProperties)1 DEFAULT_BLOOM_FILTER_ENABLED (org.apache.parquet.column.ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED)1 WriterVersion (org.apache.parquet.column.ParquetProperties.WriterVersion)1