Search in sources :

Example 6 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class DwrfMetadataReader method decryptAndCombineFileStatistics.

private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
    requireNonNull(dwrfEncryption, "dwrfEncryption is null");
    requireNonNull(encryptionLibrary, "encryptionLibrary is null");
    if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
        return fileStats;
    }
    ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
    List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
    List<byte[]> stripeKeys = null;
    if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
        stripeKeys = fileStripes.get(0).getKeyMetadata();
        checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
    }
    // node is added to the encryption group
    for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
        EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
        DwrfDataEncryptor decryptor = null;
        List<Integer> nodes = encryptionGroup.getNodes();
        for (int i = 0; i < nodes.size(); i++) {
            Integer nodeId = nodes.get(i);
            // do decryption only for those nodes that are requested (part of the projection)
            if (!nodeToIntermediateKeys.containsKey(nodeId)) {
                continue;
            }
            if (decryptor == null) {
                // DEK for the FileStats can be stored either in the footer or/and in the first stripe.
                // The key in the footer takes priority over the key in the first stripe.
                byte[] encryptedDataKeyWithMeta = null;
                if (encryptionGroup.getKeyMetadata().isPresent()) {
                    encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
                } else if (stripeKeys != null) {
                    encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
                }
                checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
                // decrypt the DEK which is encrypted using the IEK passed into a record reader
                byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
                byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
                decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
            }
            // decrypt the FileStats
            Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
            try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
            new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
                CodedInputStream input = CodedInputStream.newInstance(inputStream);
                DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
                // FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
                for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
                    decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
                }
            } catch (IOException e) {
                throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
            }
        }
    }
    return ImmutableList.copyOf(decryptedFileStats);
}
Also used : ColumnStatistics.createColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.createColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) DwrfDataEncryptor(com.facebook.presto.orc.DwrfDataEncryptor) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) IOException(java.io.IOException) BasicSliceInput(io.airlift.slice.BasicSliceInput) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) OrcMetadataReader.byteStringToSlice(com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice) Slice(io.airlift.slice.Slice) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 7 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class DwrfMetadataReader method readFooter.

@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream, DwrfEncryptionProvider dwrfEncryptionProvider, DwrfKeyProvider dwrfKeyProvider, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) throws IOException {
    long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    DwrfProto.Footer footer = DwrfProto.Footer.parseFrom(input);
    List<ColumnStatistics> fileStats = toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false);
    List<StripeInformation> fileStripes = toStripeInformation(footer.getStripesList());
    List<OrcType> types = toType(footer.getTypesList());
    Optional<DwrfEncryption> encryption = footer.hasEncryption() ? Optional.of(toEncryption(footer.getEncryption())) : Optional.empty();
    Optional<List<Integer>> stripeCacheOffsets = Optional.of(footer.getStripeCacheOffsetsList());
    if (encryption.isPresent()) {
        Map<Integer, Slice> keys = dwrfKeyProvider.getIntermediateKeys(types);
        EncryptionLibrary encryptionLibrary = dwrfEncryptionProvider.getEncryptionLibrary(encryption.get().getKeyProvider());
        fileStats = decryptAndCombineFileStatistics(hiveWriterVersion, encryption.get(), encryptionLibrary, fileStats, fileStripes, keys, orcDataSource, decompressor);
    }
    runtimeStats.addMetricValue("DwrfReadFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
    OptionalLong rawSize = footer.hasRawDataSize() ? OptionalLong.of(footer.getRawDataSize()) : OptionalLong.empty();
    return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), rawSize, fileStripes, types, fileStats, toUserMetadata(footer.getMetadataList()), encryption, stripeCacheOffsets);
}
Also used : ColumnStatistics.createColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.createColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) OrcMetadataReader.byteStringToSlice(com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice) Slice(io.airlift.slice.Slice) OptionalLong(java.util.OptionalLong) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Example 8 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class DwrfMetadataReader method readStripeFooter.

@Override
public StripeFooter readStripeFooter(OrcDataSourceId orcDataSourceId, List<OrcType> types, InputStream inputStream) throws IOException {
    long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    DwrfProto.StripeFooter stripeFooter = DwrfProto.StripeFooter.parseFrom(input);
    runtimeStats.addMetricValue("DwrfReadStripeFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
    return new StripeFooter(toStream(orcDataSourceId, stripeFooter.getStreamsList()), toColumnEncoding(types, stripeFooter.getColumnsList()), stripeFooter.getEncryptedGroupsList().stream().map(OrcMetadataReader::byteStringToSlice).collect(toImmutableList()));
}
Also used : CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto)

Example 9 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class OrcMetadataReader method readStripeFooter.

@Override
public StripeFooter readStripeFooter(HiveWriterVersion hiveWriterVersion, List<OrcType> types, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
    return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()));
}
Also used : CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto)

Example 10 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class OrcMetadataReader method readRowIndexes.

@Override
public List<RowGroupIndex> readRowIndexes(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    OrcProto.RowIndex rowIndex = OrcProto.RowIndex.parseFrom(input);
    return ImmutableList.copyOf(Iterables.transform(rowIndex.getEntryList(), rowIndexEntry -> toRowGroupIndex(hiveWriterVersion, rowIndexEntry)));
}
Also used : ORIGINAL(com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion.ORIGINAL) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OrcTypeKind(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind) BigDecimal(java.math.BigDecimal) ZLIB(com.facebook.presto.orc.metadata.CompressionKind.ZLIB) GIGABYTE(io.airlift.units.DataSize.Unit.GIGABYTE) ImmutableList(com.google.common.collect.ImmutableList) HiveWriterVersion(com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion) Slices(io.airlift.slice.Slices) Map(java.util.Map) Math.toIntExact(java.lang.Math.toIntExact) StreamKind(com.facebook.presto.orc.metadata.Stream.StreamKind) ORC_HIVE_8732(com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion.ORC_HIVE_8732) UNCOMPRESSED(com.facebook.presto.orc.metadata.CompressionKind.UNCOMPRESSED) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnEncodingKind(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind) RowIndexEntry(com.facebook.presto.orc.proto.OrcProto.RowIndexEntry) SNAPPY(com.facebook.presto.orc.metadata.CompressionKind.SNAPPY) IOException(java.io.IOException) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MIN_SURROGATE(java.lang.Character.MIN_SURROGATE) InputStream(java.io.InputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto)

Aggregations

CodedInputStream (com.facebook.presto.orc.protobuf.CodedInputStream)20 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)10 OrcProto (com.facebook.presto.orc.proto.OrcProto)10 InputStream (java.io.InputStream)7 ImmutableList (com.google.common.collect.ImmutableList)6 Slice (io.airlift.slice.Slice)6 IOException (java.io.IOException)6 HiveWriterVersion (com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion)5 List (java.util.List)5 ColumnEncodingKind (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind)4 SNAPPY (com.facebook.presto.orc.metadata.CompressionKind.SNAPPY)4 ZLIB (com.facebook.presto.orc.metadata.CompressionKind.ZLIB)4 OrcTypeKind (com.facebook.presto.orc.metadata.OrcType.OrcTypeKind)4 ORIGINAL (com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion.ORIGINAL)4 StreamKind (com.facebook.presto.orc.metadata.Stream.StreamKind)4 HiveBloomFilter (com.facebook.presto.orc.metadata.statistics.HiveBloomFilter)4 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)4 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 Math.toIntExact (java.lang.Math.toIntExact)4