Search in sources :

Example 1 with SingleRow

use of org.apache.parquet.crypto.SingleRow in project parquet-mr by apache.

the class TestEncryptionOptions method testWriteEncryptedParquetFiles.

private void testWriteEncryptedParquetFiles(Path root, List<SingleRow> data) throws IOException {
    Configuration conf = new Configuration();
    // Ensure that several pages will be created
    int pageSize = data.size() / 10;
    // Ensure that there are more row-groups created
    int rowGroupSize = pageSize * 6 * 5;
    SimpleGroupFactory f = new SimpleGroupFactory(SCHEMA);
    EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
    for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
        Path file = new Path(root, getFileName(encryptionConfiguration));
        FileEncryptionProperties encryptionProperties = encryptionConfiguration.getEncryptionProperties();
        LOG.info("\nWrite " + file.toString());
        try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(file).withWriteMode(OVERWRITE).withRowGroupSize(rowGroupSize).withPageSize(pageSize).withType(SCHEMA).withConf(conf).withEncryption(encryptionProperties).build()) {
            for (SingleRow singleRow : data) {
                writer.write(f.newGroup().append(SingleRow.BOOLEAN_FIELD_NAME, singleRow.boolean_field).append(SingleRow.INT32_FIELD_NAME, singleRow.int32_field).append(SingleRow.FLOAT_FIELD_NAME, singleRow.float_field).append(SingleRow.DOUBLE_FIELD_NAME, singleRow.double_field).append(SingleRow.BINARY_FIELD_NAME, Binary.fromConstantByteArray(singleRow.ba_field)).append(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, Binary.fromConstantByteArray(singleRow.flba_field)).append(SingleRow.PLAINTEXT_INT32_FIELD_NAME, singleRow.plaintext_int32_field));
            }
        }
    }
}
Also used : ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) Configuration(org.apache.hadoop.conf.Configuration) FileEncryptionProperties(org.apache.parquet.crypto.FileEncryptionProperties) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) SingleRow(org.apache.parquet.crypto.SingleRow)

Example 2 with SingleRow

use of org.apache.parquet.crypto.SingleRow in project parquet-mr by apache.

the class TestEncryptionOptions method testInteropReadEncryptedParquetFiles.

private void testInteropReadEncryptedParquetFiles(Path root, boolean readOnlyEncrypted, List<SingleRow> data) throws IOException {
    Configuration conf = new Configuration();
    DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
    for (DecryptionConfiguration decryptionConfiguration : decryptionConfigurations) {
        EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
        for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
            if (readOnlyEncrypted && (EncryptionConfiguration.NO_ENCRYPTION == encryptionConfiguration)) {
                continue;
            }
            Path file = new Path(root, getFileName(encryptionConfiguration));
            LOG.info("==> Decryption configuration {}", decryptionConfiguration);
            FileDecryptionProperties fileDecryptionProperties = decryptionConfiguration.getDecryptionProperties();
            LOG.info("--> Read file {} {}", file.toString(), encryptionConfiguration);
            // Read only the non-encrypted columns
            if ((decryptionConfiguration == DecryptionConfiguration.NO_DECRYPTION) && (encryptionConfiguration == EncryptionConfiguration.ENCRYPT_COLUMNS_PLAINTEXT_FOOTER)) {
                conf.set("parquet.read.schema", Types.buildMessage().required(BOOLEAN).named(SingleRow.BOOLEAN_FIELD_NAME).required(INT32).named(SingleRow.INT32_FIELD_NAME).named("FormatTestObject").toString());
            }
            int rowNum = 0;
            try (ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).withDecryption(fileDecryptionProperties).build()) {
                for (Group group = reader.read(); group != null; group = reader.read()) {
                    SingleRow rowExpected = data.get(rowNum++);
                    // plaintext columns
                    if (rowExpected.boolean_field != group.getBoolean(SingleRow.BOOLEAN_FIELD_NAME, 0)) {
                        addErrorToErrorCollectorAndLog("Wrong bool", encryptionConfiguration, decryptionConfiguration);
                    }
                    if (rowExpected.int32_field != group.getInteger(SingleRow.INT32_FIELD_NAME, 0)) {
                        addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
                    }
                    // encrypted columns
                    if (decryptionConfiguration != DecryptionConfiguration.NO_DECRYPTION) {
                        if (rowExpected.float_field != group.getFloat(SingleRow.FLOAT_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong float", encryptionConfiguration, decryptionConfiguration);
                        }
                        if (rowExpected.double_field != group.getDouble(SingleRow.DOUBLE_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong double", encryptionConfiguration, decryptionConfiguration);
                        }
                    }
                }
            } catch (ParquetCryptoRuntimeException e) {
                checkResult(file.getName(), decryptionConfiguration, e);
            } catch (Exception e) {
                e.printStackTrace();
                addErrorToErrorCollectorAndLog("Unexpected exception: " + e.getClass().getName() + " with message: " + e.getMessage(), encryptionConfiguration, decryptionConfiguration);
            }
            conf.unset("parquet.read.schema");
        }
    }
}
Also used : ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) GroupReadSupport(org.apache.parquet.hadoop.example.GroupReadSupport) Configuration(org.apache.hadoop.conf.Configuration) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) IOException(java.io.IOException) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) FileDecryptionProperties(org.apache.parquet.crypto.FileDecryptionProperties) SingleRow(org.apache.parquet.crypto.SingleRow)

Example 3 with SingleRow

use of org.apache.parquet.crypto.SingleRow in project parquet-mr by apache.

the class TestEncryptionOptions method testReadEncryptedParquetFiles.

private void testReadEncryptedParquetFiles(Path root, List<SingleRow> data) {
    Configuration conf = new Configuration();
    DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
    for (DecryptionConfiguration decryptionConfiguration : decryptionConfigurations) {
        EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
        for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
            Path file = new Path(root, getFileName(encryptionConfiguration));
            LOG.info("==> Decryption configuration {}", decryptionConfiguration);
            FileDecryptionProperties fileDecryptionProperties = decryptionConfiguration.getDecryptionProperties();
            LOG.info("--> Read file {} {}", file.toString(), encryptionConfiguration);
            // Read only the non-encrypted columns
            if ((decryptionConfiguration == DecryptionConfiguration.NO_DECRYPTION) && (encryptionConfiguration == EncryptionConfiguration.ENCRYPT_COLUMNS_PLAINTEXT_FOOTER)) {
                conf.set("parquet.read.schema", Types.buildMessage().optional(INT32).named(SingleRow.PLAINTEXT_INT32_FIELD_NAME).named("FormatTestObject").toString());
            }
            int rowNum = 0;
            try (ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).withDecryption(fileDecryptionProperties).build()) {
                for (Group group = reader.read(); group != null; group = reader.read()) {
                    SingleRow rowExpected = data.get(rowNum++);
                    // plaintext columns
                    if (rowExpected.plaintext_int32_field != group.getInteger(SingleRow.PLAINTEXT_INT32_FIELD_NAME, 0)) {
                        addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
                    }
                    // encrypted columns
                    if (decryptionConfiguration != DecryptionConfiguration.NO_DECRYPTION) {
                        if (rowExpected.boolean_field != group.getBoolean(SingleRow.BOOLEAN_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong bool", encryptionConfiguration, decryptionConfiguration);
                        }
                        if (rowExpected.int32_field != group.getInteger(SingleRow.INT32_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong int", encryptionConfiguration, decryptionConfiguration);
                        }
                        if (rowExpected.float_field != group.getFloat(SingleRow.FLOAT_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong float", encryptionConfiguration, decryptionConfiguration);
                        }
                        if (rowExpected.double_field != group.getDouble(SingleRow.DOUBLE_FIELD_NAME, 0)) {
                            addErrorToErrorCollectorAndLog("Wrong double", encryptionConfiguration, decryptionConfiguration);
                        }
                        if ((null != rowExpected.ba_field) && !Arrays.equals(rowExpected.ba_field, group.getBinary(SingleRow.BINARY_FIELD_NAME, 0).getBytes())) {
                            addErrorToErrorCollectorAndLog("Wrong byte array", encryptionConfiguration, decryptionConfiguration);
                        }
                        if (!Arrays.equals(rowExpected.flba_field, group.getBinary(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, 0).getBytes())) {
                            addErrorToErrorCollectorAndLog("Wrong fixed-length byte array", encryptionConfiguration, decryptionConfiguration);
                        }
                    }
                }
            } catch (ParquetCryptoRuntimeException e) {
                checkResult(file.getName(), decryptionConfiguration, e);
            } catch (Exception e) {
                e.printStackTrace();
                addErrorToErrorCollectorAndLog("Unexpected exception: " + e.getClass().getName() + " with message: " + e.getMessage(), encryptionConfiguration, decryptionConfiguration);
            }
            conf.unset("parquet.read.schema");
        }
    }
}
Also used : ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) GroupReadSupport(org.apache.parquet.hadoop.example.GroupReadSupport) Configuration(org.apache.hadoop.conf.Configuration) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) IOException(java.io.IOException) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) FileDecryptionProperties(org.apache.parquet.crypto.FileDecryptionProperties) SingleRow(org.apache.parquet.crypto.SingleRow)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 SingleRow (org.apache.parquet.crypto.SingleRow)3 Group (org.apache.parquet.example.data.Group)3 ColumnPath (org.apache.parquet.hadoop.metadata.ColumnPath)3 IOException (java.io.IOException)2 FileDecryptionProperties (org.apache.parquet.crypto.FileDecryptionProperties)2 ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)2 GroupReadSupport (org.apache.parquet.hadoop.example.GroupReadSupport)2 FileEncryptionProperties (org.apache.parquet.crypto.FileEncryptionProperties)1 SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)1