Search in sources :

Example 1 with DwrfDataEncryptor

use of com.facebook.presto.orc.DwrfDataEncryptor in project presto by prestodb.

the class DwrfMetadataReader method decryptAndCombineFileStatistics.

private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
    requireNonNull(dwrfEncryption, "dwrfEncryption is null");
    requireNonNull(encryptionLibrary, "encryptionLibrary is null");
    if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
        return fileStats;
    }
    ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
    List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
    List<byte[]> stripeKeys = null;
    if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
        stripeKeys = fileStripes.get(0).getKeyMetadata();
        checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
    }
    // node is added to the encryption group
    for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
        EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
        DwrfDataEncryptor decryptor = null;
        List<Integer> nodes = encryptionGroup.getNodes();
        for (int i = 0; i < nodes.size(); i++) {
            Integer nodeId = nodes.get(i);
            // do decryption only for those nodes that are requested (part of the projection)
            if (!nodeToIntermediateKeys.containsKey(nodeId)) {
                continue;
            }
            if (decryptor == null) {
                // DEK for the FileStats can be stored either in the footer or/and in the first stripe.
                // The key in the footer takes priority over the key in the first stripe.
                byte[] encryptedDataKeyWithMeta = null;
                if (encryptionGroup.getKeyMetadata().isPresent()) {
                    encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
                } else if (stripeKeys != null) {
                    encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
                }
                checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
                // decrypt the DEK which is encrypted using the IEK passed into a record reader
                byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
                byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
                decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
            }
            // decrypt the FileStats
            Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
            try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
            new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
                CodedInputStream input = CodedInputStream.newInstance(inputStream);
                DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
                // FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
                for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
                    decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
                }
            } catch (IOException e) {
                throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
            }
        }
    }
    return ImmutableList.copyOf(decryptedFileStats);
}
Also used : ColumnStatistics.createColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.createColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) DwrfDataEncryptor(com.facebook.presto.orc.DwrfDataEncryptor) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) IOException(java.io.IOException) BasicSliceInput(io.airlift.slice.BasicSliceInput) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) OrcMetadataReader.byteStringToSlice(com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice) Slice(io.airlift.slice.Slice) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 2 with DwrfDataEncryptor

use of com.facebook.presto.orc.DwrfDataEncryptor in project presto by prestodb.

the class ColumnWriters method createColumnWriter.

public static ColumnWriter createColumnWriter(int columnIndex, List<OrcType> orcTypes, Type type, ColumnWriterOptions columnWriterOptions, OrcEncoding orcEncoding, DateTimeZone hiveStorageTimeZone, DwrfEncryptionInfo dwrfEncryptors, MetadataWriter metadataWriter) {
    requireNonNull(type, "type is null");
    OrcType orcType = orcTypes.get(columnIndex);
    Optional<DwrfDataEncryptor> dwrfEncryptor = dwrfEncryptors.getEncryptorByNodeId(columnIndex);
    switch(orcType.getOrcTypeKind()) {
        case BOOLEAN:
            return new BooleanColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
        case FLOAT:
            return new FloatColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
        case DOUBLE:
            return new DoubleColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
        case BYTE:
            return new ByteColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
        case DATE:
            checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
            return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, DateStatisticsBuilder::new, metadataWriter);
        case SHORT:
            return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, IntegerStatisticsBuilder::new, metadataWriter);
        case INT:
        case LONG:
            if (columnWriterOptions.isIntegerDictionaryEncodingEnabled() && orcEncoding == DWRF) {
                // ORC V1 does not support Integer Dictionary encoding. DWRF supports Integer dictionary encoding.
                return new LongDictionaryColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, metadataWriter);
            }
            return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, IntegerStatisticsBuilder::new, metadataWriter);
        case DECIMAL:
            checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
            return new DecimalColumnWriter(columnIndex, type, columnWriterOptions, orcEncoding, metadataWriter);
        case TIMESTAMP:
            return new TimestampColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, hiveStorageTimeZone, metadataWriter);
        case BINARY:
            return new SliceDirectColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, BinaryStatisticsBuilder::new, metadataWriter);
        case CHAR:
            checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
        // fall through
        case VARCHAR:
        case STRING:
            if (columnWriterOptions.isStringDictionaryEncodingEnabled()) {
                return new SliceDictionaryColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, metadataWriter);
            }
            int stringStatisticsLimit = columnWriterOptions.getStringStatisticsLimit();
            return new SliceDirectColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, () -> new StringStatisticsBuilder(stringStatisticsLimit), metadataWriter);
        case LIST:
            {
                int fieldColumnIndex = orcType.getFieldTypeIndex(0);
                Type fieldType = type.getTypeParameters().get(0);
                ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
                return new ListColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, orcEncoding, elementWriter, metadataWriter);
            }
        case MAP:
            {
                ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
                ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
                return new MapColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, orcEncoding, keyWriter, valueWriter, metadataWriter);
            }
        case STRUCT:
            {
                ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
                for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
                    int fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
                    Type fieldType = type.getTypeParameters().get(fieldId);
                    fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter));
                }
                return new StructColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, fieldWriters.build(), metadataWriter);
            }
    }
    throw new IllegalArgumentException("Unsupported type: " + type);
}
Also used : StringStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.StringStatisticsBuilder) DateStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.DateStatisticsBuilder) IntegerStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder) StringStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.StringStatisticsBuilder) BinaryStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.BinaryStatisticsBuilder) IntegerStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder) DateStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.DateStatisticsBuilder) BinaryStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.BinaryStatisticsBuilder) DwrfDataEncryptor(com.facebook.presto.orc.DwrfDataEncryptor) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) OrcType(com.facebook.presto.orc.metadata.OrcType)

Aggregations

DwrfDataEncryptor (com.facebook.presto.orc.DwrfDataEncryptor)2 Type (com.facebook.presto.common.type.Type)1 OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)1 OrcMetadataReader.byteStringToSlice (com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice)1 OrcType (com.facebook.presto.orc.metadata.OrcType)1 BinaryStatisticsBuilder (com.facebook.presto.orc.metadata.statistics.BinaryStatisticsBuilder)1 ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)1 ColumnStatistics.createColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics.createColumnStatistics)1 DateStatisticsBuilder (com.facebook.presto.orc.metadata.statistics.DateStatisticsBuilder)1 IntegerStatisticsBuilder (com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder)1 StringStatisticsBuilder (com.facebook.presto.orc.metadata.statistics.StringStatisticsBuilder)1 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)1 CodedInputStream (com.facebook.presto.orc.protobuf.CodedInputStream)1 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)1 SharedBuffer (com.facebook.presto.orc.stream.SharedBuffer)1 BasicSliceInput (io.airlift.slice.BasicSliceInput)1 Slice (io.airlift.slice.Slice)1 IOException (java.io.IOException)1