Search in sources :

Example 6 with ParquetCryptoRuntimeException

use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.

the class KeyToolkit method createAndInitKmsClient.

private static KmsClient createAndInitKmsClient(Configuration configuration, String kmsInstanceID, String kmsInstanceURL, String accessToken) {
    Class<?> kmsClientClass = null;
    KmsClient kmsClient = null;
    try {
        kmsClientClass = ConfigurationUtil.getClassFromConfig(configuration, KMS_CLIENT_CLASS_PROPERTY_NAME, KmsClient.class);
        if (null == kmsClientClass) {
            throw new ParquetCryptoRuntimeException("Unspecified " + KMS_CLIENT_CLASS_PROPERTY_NAME);
        }
        kmsClient = (KmsClient) kmsClientClass.newInstance();
    } catch (InstantiationException | IllegalAccessException | BadConfigurationException e) {
        throw new ParquetCryptoRuntimeException("Could not instantiate KmsClient class: " + kmsClientClass, e);
    }
    kmsClient.initialize(configuration, kmsInstanceID, kmsInstanceURL, accessToken);
    return kmsClient;
}
Also used : ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) BadConfigurationException(org.apache.parquet.hadoop.BadConfigurationException)

Example 7 with ParquetCryptoRuntimeException

use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.

the class ParquetMetadataConverter method fromParquetMetadata.

public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata, InternalFileDecryptor fileDecryptor, boolean encryptedFooter) throws IOException {
    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders());
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    List<RowGroup> row_groups = parquetMetadata.getRow_groups();
    if (row_groups != null) {
        for (RowGroup rowGroup : row_groups) {
            BlockMetaData blockMetaData = new BlockMetaData();
            blockMetaData.setRowCount(rowGroup.getNum_rows());
            blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
            // not set in legacy files
            if (rowGroup.isSetOrdinal()) {
                blockMetaData.setOrdinal(rowGroup.getOrdinal());
            }
            List<ColumnChunk> columns = rowGroup.getColumns();
            String filePath = columns.get(0).getFile_path();
            int columnOrdinal = -1;
            for (ColumnChunk columnChunk : columns) {
                columnOrdinal++;
                if ((filePath == null && columnChunk.getFile_path() != null) || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
                    throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
                }
                ColumnMetaData metaData = columnChunk.meta_data;
                ColumnCryptoMetaData cryptoMetaData = columnChunk.getCrypto_metadata();
                ColumnChunkMetaData column = null;
                ColumnPath columnPath = null;
                boolean encryptedMetadata = false;
                if (null == cryptoMetaData) {
                    // Plaintext column
                    columnPath = getPath(metaData);
                    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
                        // mark this column as plaintext in encrypted file decryptor
                        fileDecryptor.setColumnCryptoMetadata(columnPath, false, false, (byte[]) null, columnOrdinal);
                    }
                } else {
                    // Encrypted column
                    boolean encryptedWithFooterKey = cryptoMetaData.isSetENCRYPTION_WITH_FOOTER_KEY();
                    if (encryptedWithFooterKey) {
                        // Column encrypted with footer key
                        if (!encryptedFooter) {
                            throw new ParquetCryptoRuntimeException("Column encrypted with footer key in file with plaintext footer");
                        }
                        if (null == metaData) {
                            throw new ParquetCryptoRuntimeException("ColumnMetaData not set in Encryption with Footer key");
                        }
                        if (null == fileDecryptor) {
                            throw new ParquetCryptoRuntimeException("Column encrypted with footer key: No keys available");
                        }
                        columnPath = getPath(metaData);
                        fileDecryptor.setColumnCryptoMetadata(columnPath, true, true, (byte[]) null, columnOrdinal);
                    } else {
                        // Column encrypted with column key
                        // setColumnCryptoMetadata triggers KMS interaction, hence delayed until this column is projected
                        encryptedMetadata = true;
                    }
                }
                String createdBy = parquetMetadata.getCreated_by();
                if (!encryptedMetadata) {
                    // unencrypted column, or encrypted with footer key
                    column = buildColumnChunkMetaData(metaData, columnPath, messageType.getType(columnPath.toArray()).asPrimitiveType(), createdBy);
                    column.setRowGroupOrdinal(rowGroup.getOrdinal());
                    if (metaData.isSetBloom_filter_offset()) {
                        column.setBloomFilterOffset(metaData.getBloom_filter_offset());
                    }
                } else {
                    // column encrypted with column key
                    // Metadata will be decrypted later, if this column is accessed
                    EncryptionWithColumnKey columnKeyStruct = cryptoMetaData.getENCRYPTION_WITH_COLUMN_KEY();
                    List<String> pathList = columnKeyStruct.getPath_in_schema();
                    byte[] columnKeyMetadata = columnKeyStruct.getKey_metadata();
                    columnPath = ColumnPath.get(pathList.toArray(new String[pathList.size()]));
                    byte[] encryptedMetadataBuffer = columnChunk.getEncrypted_column_metadata();
                    column = ColumnChunkMetaData.getWithEncryptedMetadata(this, columnPath, messageType.getType(columnPath.toArray()).asPrimitiveType(), encryptedMetadataBuffer, columnKeyMetadata, fileDecryptor, rowGroup.getOrdinal(), columnOrdinal, createdBy);
                }
                column.setColumnIndexReference(toColumnIndexReference(columnChunk));
                column.setOffsetIndexReference(toOffsetIndexReference(columnChunk));
                // TODO
                // index_page_offset
                // key_value_metadata
                blockMetaData.addColumn(column);
            }
            blockMetaData.setPath(filePath);
            blocks.add(blockMetaData);
        }
    }
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata();
    if (key_value_metadata != null) {
        for (KeyValue keyValue : key_value_metadata) {
            keyValueMetaData.put(keyValue.key, keyValue.value);
        }
    }
    return new ParquetMetadata(new org.apache.parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by(), fileDecryptor), blocks);
}
Also used : BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) KeyValue(org.apache.parquet.format.KeyValue) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) RowGroup(org.apache.parquet.format.RowGroup) ArrayList(java.util.ArrayList) ColumnChunk(org.apache.parquet.format.ColumnChunk) Util.writeColumnMetaData(org.apache.parquet.format.Util.writeColumnMetaData) ColumnMetaData(org.apache.parquet.format.ColumnMetaData) MessageType(org.apache.parquet.schema.MessageType) ColumnCryptoMetaData(org.apache.parquet.format.ColumnCryptoMetaData) EncryptionWithColumnKey(org.apache.parquet.format.EncryptionWithColumnKey) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath)

Example 8 with ParquetCryptoRuntimeException

use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.

the class ParquetMetadataConverter method readParquetMetadata.

public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter, final InternalFileDecryptor fileDecryptor, final boolean encryptedFooter, final int combinedFooterLength) throws IOException {
    final BlockCipher.Decryptor footerDecryptor = (encryptedFooter ? fileDecryptor.fetchFooterDecryptor() : null);
    final byte[] encryptedFooterAAD = (encryptedFooter ? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);
    FileMetaData fileMetaData = filter.accept(new MetadataFilterVisitor<FileMetaData, IOException>() {

        @Override
        public FileMetaData visit(NoFilter filter) throws IOException {
            return readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
        }

        @Override
        public FileMetaData visit(SkipMetadataFilter filter) throws IOException {
            return readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD);
        }

        @Override
        public FileMetaData visit(OffsetMetadataFilter filter) throws IOException {
            return filterFileMetaDataByStart(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
        }

        @Override
        public FileMetaData visit(RangeMetadataFilter filter) throws IOException {
            return filterFileMetaDataByMidpoint(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
        }
    });
    LOG.debug("{}", fileMetaData);
    if (!encryptedFooter && null != fileDecryptor) {
        if (!fileMetaData.isSetEncryption_algorithm()) {
            // Plaintext file
            fileDecryptor.setPlaintextFile();
            // Done to detect files that were not encrypted by mistake
            if (!fileDecryptor.plaintextFilesAllowed()) {
                throw new ParquetCryptoRuntimeException("Applying decryptor on plaintext file");
            }
        } else {
            // Encrypted file with plaintext footer
            // if no fileDecryptor, can still read plaintext columns
            fileDecryptor.setFileCryptoMetaData(fileMetaData.getEncryption_algorithm(), false, fileMetaData.getFooter_signing_key_metadata());
            if (fileDecryptor.checkFooterIntegrity()) {
                verifyFooterIntegrity(from, fileDecryptor, combinedFooterLength);
            }
        }
    }
    ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData, fileDecryptor, encryptedFooter);
    if (LOG.isDebugEnabled())
        LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata));
    return parquetMetadata;
}
Also used : BlockCipher(org.apache.parquet.format.BlockCipher) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) IOException(java.io.IOException) Util.readFileMetaData(org.apache.parquet.format.Util.readFileMetaData) FileMetaData(org.apache.parquet.format.FileMetaData)

Example 9 with ParquetCryptoRuntimeException

use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.

the class KeyMetadata method createSerializedForExternalMaterial.

// For external material only. For internal material, create serialized KeyMaterial directly
static String createSerializedForExternalMaterial(String keyReference) {
    Map<String, Object> keyMetadataMap = new HashMap<String, Object>(3);
    // 1. Write "key material type"
    keyMetadataMap.put(KeyMaterial.KEY_MATERIAL_TYPE_FIELD, KeyMaterial.KEY_MATERIAL_TYPE1);
    // 2. Write internal storage as false
    keyMetadataMap.put(KEY_MATERIAL_INTERNAL_STORAGE_FIELD, Boolean.FALSE);
    // 3. For externally stored "key material", "key metadata" keeps only a reference to it
    keyMetadataMap.put(KEY_REFERENCE_FIELD, keyReference);
    try {
        return OBJECT_MAPPER.writeValueAsString(keyMetadataMap);
    } catch (IOException e) {
        throw new ParquetCryptoRuntimeException("Failed to serialize key metadata", e);
    }
}
Also used : HashMap(java.util.HashMap) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) IOException(java.io.IOException)

Example 10 with ParquetCryptoRuntimeException

use of org.apache.parquet.crypto.ParquetCryptoRuntimeException in project parquet-mr by apache.

the class PropertiesDrivenCryptoFactory method getColumnEncryptionProperties.

private Map<ColumnPath, ColumnEncryptionProperties> getColumnEncryptionProperties(int dekLength, String columnKeys, FileKeyWrapper keyWrapper) throws ParquetCryptoRuntimeException {
    Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = new HashMap<ColumnPath, ColumnEncryptionProperties>();
    String[] keyToColumns = columnKeys.split(";");
    for (int i = 0; i < keyToColumns.length; ++i) {
        final String curKeyToColumns = keyToColumns[i].trim();
        if (curKeyToColumns.isEmpty()) {
            continue;
        }
        String[] parts = curKeyToColumns.split(":");
        if (parts.length != 2) {
            throw new ParquetCryptoRuntimeException("Incorrect key to columns mapping in " + COLUMN_KEYS_PROPERTY_NAME + ": [" + curKeyToColumns + "]");
        }
        String columnKeyId = parts[0].trim();
        if (columnKeyId.isEmpty()) {
            throw new ParquetCryptoRuntimeException("Empty key name in " + COLUMN_KEYS_PROPERTY_NAME);
        }
        String columnNamesStr = parts[1].trim();
        String[] columnNames = columnNamesStr.split(",");
        if (0 == columnNames.length) {
            throw new ParquetCryptoRuntimeException("No columns to encrypt defined for key: " + columnKeyId);
        }
        for (int j = 0; j < columnNames.length; ++j) {
            final String columnName = columnNames[j].trim();
            if (columnName.isEmpty()) {
                throw new ParquetCryptoRuntimeException("Empty column name in " + COLUMN_KEYS_PROPERTY_NAME + " for key: " + columnKeyId);
            }
            final ColumnPath columnPath = ColumnPath.fromDotString(columnName);
            if (encryptedColumns.containsKey(columnPath)) {
                throw new ParquetCryptoRuntimeException("Multiple keys defined for the same column: " + columnName);
            }
            byte[] columnKeyBytes = new byte[dekLength];
            RANDOM.nextBytes(columnKeyBytes);
            byte[] columnKeyKeyMetadata = keyWrapper.getEncryptionKeyMetadata(columnKeyBytes, columnKeyId, false);
            ColumnEncryptionProperties cmd = ColumnEncryptionProperties.builder(columnPath).withKey(columnKeyBytes).withKeyMetaData(columnKeyKeyMetadata).build();
            encryptedColumns.put(columnPath, cmd);
        }
    }
    if (encryptedColumns.isEmpty()) {
        throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
    }
    return encryptedColumns;
}
Also used : HashMap(java.util.HashMap) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) ColumnEncryptionProperties(org.apache.parquet.crypto.ColumnEncryptionProperties) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath)

Aggregations

ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)22 IOException (java.io.IOException)15 ColumnPath (org.apache.parquet.hadoop.metadata.ColumnPath)7 HashMap (java.util.HashMap)5 Path (org.apache.hadoop.fs.Path)4 TypeReference (com.fasterxml.jackson.core.type.TypeReference)3 ColumnEncryptionProperties (org.apache.parquet.crypto.ColumnEncryptionProperties)3 FileDecryptionProperties (org.apache.parquet.crypto.FileDecryptionProperties)3 ColumnMetaData (org.apache.parquet.format.ColumnMetaData)3 StringReader (java.io.StringReader)2 ArrayList (java.util.ArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileEncryptionProperties (org.apache.parquet.crypto.FileEncryptionProperties)2 InternalColumnDecryptionSetup (org.apache.parquet.crypto.InternalColumnDecryptionSetup)2 SingleRow (org.apache.parquet.crypto.SingleRow)2 Group (org.apache.parquet.example.data.Group)2 BlockCipher (org.apache.parquet.format.BlockCipher)2 ColumnChunk (org.apache.parquet.format.ColumnChunk)2 RowGroup (org.apache.parquet.format.RowGroup)2 Util.writeColumnMetaData (org.apache.parquet.format.Util.writeColumnMetaData)2