use of org.apache.parquet.crypto.FileDecryptionProperties in project parquet-mr by apache.
the class TestEncryptionOptions method testInteropReadEncryptedParquetFiles.
private void testInteropReadEncryptedParquetFiles(Path root, boolean readOnlyEncrypted, List<SingleRow> data) throws IOException {
Configuration conf = new Configuration();
DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
for (DecryptionConfiguration decryptionConfiguration : decryptionConfigurations) {
EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
if (readOnlyEncrypted && (EncryptionConfiguration.NO_ENCRYPTION == encryptionConfiguration)) {
continue;
}
Path file = new Path(root, getFileName(encryptionConfiguration));
LOG.info("==> Decryption configuration {}", decryptionConfiguration);
FileDecryptionProperties fileDecryptionProperties = decryptionConfiguration.getDecryptionProperties();
LOG.info("--> Read file {} {}", file.toString(), encryptionConfiguration);
// Read only the non-encrypted columns
if ((decryptionConfiguration == DecryptionConfiguration.NO_DECRYPTION) && (encryptionConfiguration == EncryptionConfiguration.ENCRYPT_COLUMNS_PLAINTEXT_FOOTER)) {
conf.set("parquet.read.schema", Types.buildMessage().required(BOOLEAN).named(SingleRow.BOOLEAN_FIELD_NAME).required(INT32).named(SingleRow.INT32_FIELD_NAME).named("FormatTestObject").toString());
}
int rowNum = 0;
try (ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).withDecryption(fileDecryptionProperties).build()) {
for (Group group = reader.read(); group != null; group = reader.read()) {
SingleRow rowExpected = data.get(rowNum++);
// plaintext columns
if (rowExpected.boolean_field != group.getBoolean(SingleRow.BOOLEAN_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong bool", encryptionConfiguration, decryptionConfiguration);
}
if (rowExpected.int32_field != group.getInteger(SingleRow.INT32_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
}
// encrypted columns
if (decryptionConfiguration != DecryptionConfiguration.NO_DECRYPTION) {
if (rowExpected.float_field != group.getFloat(SingleRow.FLOAT_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong float", encryptionConfiguration, decryptionConfiguration);
}
if (rowExpected.double_field != group.getDouble(SingleRow.DOUBLE_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong double", encryptionConfiguration, decryptionConfiguration);
}
}
}
} catch (ParquetCryptoRuntimeException e) {
checkResult(file.getName(), decryptionConfiguration, e);
} catch (Exception e) {
e.printStackTrace();
addErrorToErrorCollectorAndLog("Unexpected exception: " + e.getClass().getName() + " with message: " + e.getMessage(), encryptionConfiguration, decryptionConfiguration);
}
conf.unset("parquet.read.schema");
}
}
}
use of org.apache.parquet.crypto.FileDecryptionProperties in project parquet-mr by apache.
the class TestEncryptionOptions method testReadEncryptedParquetFiles.
private void testReadEncryptedParquetFiles(Path root, List<SingleRow> data) {
Configuration conf = new Configuration();
DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
for (DecryptionConfiguration decryptionConfiguration : decryptionConfigurations) {
EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
Path file = new Path(root, getFileName(encryptionConfiguration));
LOG.info("==> Decryption configuration {}", decryptionConfiguration);
FileDecryptionProperties fileDecryptionProperties = decryptionConfiguration.getDecryptionProperties();
LOG.info("--> Read file {} {}", file.toString(), encryptionConfiguration);
// Read only the non-encrypted columns
if ((decryptionConfiguration == DecryptionConfiguration.NO_DECRYPTION) && (encryptionConfiguration == EncryptionConfiguration.ENCRYPT_COLUMNS_PLAINTEXT_FOOTER)) {
conf.set("parquet.read.schema", Types.buildMessage().optional(INT32).named(SingleRow.PLAINTEXT_INT32_FIELD_NAME).named("FormatTestObject").toString());
}
int rowNum = 0;
try (ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).withDecryption(fileDecryptionProperties).build()) {
for (Group group = reader.read(); group != null; group = reader.read()) {
SingleRow rowExpected = data.get(rowNum++);
// plaintext columns
if (rowExpected.plaintext_int32_field != group.getInteger(SingleRow.PLAINTEXT_INT32_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
}
// encrypted columns
if (decryptionConfiguration != DecryptionConfiguration.NO_DECRYPTION) {
if (rowExpected.boolean_field != group.getBoolean(SingleRow.BOOLEAN_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong bool", encryptionConfiguration, decryptionConfiguration);
}
if (rowExpected.int32_field != group.getInteger(SingleRow.INT32_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
}
if (rowExpected.float_field != group.getFloat(SingleRow.FLOAT_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong float", encryptionConfiguration, decryptionConfiguration);
}
if (rowExpected.double_field != group.getDouble(SingleRow.DOUBLE_FIELD_NAME, 0)) {
addErrorToErrorCollectorAndLog("Wrong double", encryptionConfiguration, decryptionConfiguration);
}
if ((null != rowExpected.ba_field) && !Arrays.equals(rowExpected.ba_field, group.getBinary(SingleRow.BINARY_FIELD_NAME, 0).getBytes())) {
addErrorToErrorCollectorAndLog("Wrong byte array", encryptionConfiguration, decryptionConfiguration);
}
if (!Arrays.equals(rowExpected.flba_field, group.getBinary(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, 0).getBytes())) {
addErrorToErrorCollectorAndLog("Wrong fixed-length byte array", encryptionConfiguration, decryptionConfiguration);
}
}
}
} catch (ParquetCryptoRuntimeException e) {
checkResult(file.getName(), decryptionConfiguration, e);
} catch (Exception e) {
e.printStackTrace();
addErrorToErrorCollectorAndLog("Unexpected exception: " + e.getClass().getName() + " with message: " + e.getMessage(), encryptionConfiguration, decryptionConfiguration);
}
conf.unset("parquet.read.schema");
}
}
}
use of org.apache.parquet.crypto.FileDecryptionProperties in project parquet-mr by apache.
the class ParquetFileReader method readFooter.
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
long fileLen = file.getLength();
String filePath = file.toString();
LOG.debug("File length {}", fileLen);
int FOOTER_LENGTH_SIZE = 4;
if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) {
// MAGIC + data + footer + footerIndex + MAGIC
throw new RuntimeException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")");
}
// Read footer length and magic string - with a single seek
byte[] magic = new byte[MAGIC.length];
long fileMetadataLengthIndex = fileLen - magic.length - FOOTER_LENGTH_SIZE;
LOG.debug("reading footer index at {}", fileMetadataLengthIndex);
f.seek(fileMetadataLengthIndex);
int fileMetadataLength = readIntLittleEndian(f);
f.readFully(magic);
boolean encryptedFooterMode;
if (Arrays.equals(MAGIC, magic)) {
encryptedFooterMode = false;
} else if (Arrays.equals(EFMAGIC, magic)) {
encryptedFooterMode = true;
} else {
throw new RuntimeException(filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic));
}
long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength;
LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex);
if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) {
throw new RuntimeException("corrupted file: the footer index is not within the file: " + fileMetadataIndex);
}
f.seek(fileMetadataIndex);
FileDecryptionProperties fileDecryptionProperties = options.getDecryptionProperties();
InternalFileDecryptor fileDecryptor = null;
if (null != fileDecryptionProperties) {
fileDecryptor = new InternalFileDecryptor(fileDecryptionProperties);
}
// Read all the footer bytes in one time to avoid multiple read operations,
// since it can be pretty time consuming for a single read operation in HDFS.
ByteBuffer footerBytesBuffer = ByteBuffer.allocate(fileMetadataLength);
f.readFully(footerBytesBuffer);
LOG.debug("Finished to read all footer bytes.");
footerBytesBuffer.flip();
InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
// Regular file, or encrypted file with plaintext footer
if (!encryptedFooterMode) {
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
}
// Encrypted file with encrypted footer
if (null == fileDecryptor) {
throw new ParquetCryptoRuntimeException("Trying to read file with encrypted footer. No keys available");
}
FileCryptoMetaData fileCryptoMetaData = readFileCryptoMetaData(footerBytesStream);
fileDecryptor.setFileCryptoMetaData(fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
// footer length is required only for signed plaintext footers
return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
}
use of org.apache.parquet.crypto.FileDecryptionProperties in project parquet-mr by apache.
the class TestBloomFiltering method readUsers.
private List<PhoneBookWriter.User> readUsers(FilterPredicate filter, boolean useOtherFiltering, boolean useBloomFilter) throws IOException {
FileDecryptionProperties fileDecryptionProperties = null;
if (isEncrypted) {
DecryptionKeyRetrieverMock decryptionKeyRetrieverMock = new DecryptionKeyRetrieverMock().putKey(FOOTER_ENCRYPTION_KEY_ID, FOOTER_ENCRYPTION_KEY).putKey(COLUMN_ENCRYPTION_KEY1_ID, COLUMN_ENCRYPTION_KEY1).putKey(COLUMN_ENCRYPTION_KEY2_ID, COLUMN_ENCRYPTION_KEY2);
fileDecryptionProperties = FileDecryptionProperties.builder().withKeyRetriever(decryptionKeyRetrieverMock).build();
}
return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file).withFilter(FilterCompat.get(filter)).withDecryption(fileDecryptionProperties).useDictionaryFilter(useOtherFiltering).useStatsFilter(useOtherFiltering).useRecordFilter(useOtherFiltering).useBloomFilter(useBloomFilter).useColumnIndexFilter(useOtherFiltering));
}
use of org.apache.parquet.crypto.FileDecryptionProperties in project parquet-mr by apache.
the class TestColumnIndexFiltering method getFileDecryptionProperties.
private FileDecryptionProperties getFileDecryptionProperties() {
FileDecryptionProperties decryptionProperties = null;
if (isEncrypted) {
DecryptionKeyRetrieverMock decryptionKeyRetrieverMock = new DecryptionKeyRetrieverMock().putKey(FOOTER_ENCRYPTION_KEY_ID, FOOTER_ENCRYPTION_KEY).putKey(COLUMN_ENCRYPTION_KEY1_ID, COLUMN_ENCRYPTION_KEY1).putKey(COLUMN_ENCRYPTION_KEY2_ID, COLUMN_ENCRYPTION_KEY2);
decryptionProperties = FileDecryptionProperties.builder().withKeyRetriever(decryptionKeyRetrieverMock).build();
}
return decryptionProperties;
}
Aggregations