use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class DwrfMetadataReader method decryptAndCombineFileStatistics.
private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
requireNonNull(dwrfEncryption, "dwrfEncryption is null");
requireNonNull(encryptionLibrary, "encryptionLibrary is null");
if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
return fileStats;
}
ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
List<byte[]> stripeKeys = null;
if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
stripeKeys = fileStripes.get(0).getKeyMetadata();
checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
}
// node is added to the encryption group
for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
DwrfDataEncryptor decryptor = null;
List<Integer> nodes = encryptionGroup.getNodes();
for (int i = 0; i < nodes.size(); i++) {
Integer nodeId = nodes.get(i);
// do decryption only for those nodes that are requested (part of the projection)
if (!nodeToIntermediateKeys.containsKey(nodeId)) {
continue;
}
if (decryptor == null) {
// DEK for the FileStats can be stored either in the footer or/and in the first stripe.
// The key in the footer takes priority over the key in the first stripe.
byte[] encryptedDataKeyWithMeta = null;
if (encryptionGroup.getKeyMetadata().isPresent()) {
encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
} else if (stripeKeys != null) {
encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
}
checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
// decrypt the DEK which is encrypted using the IEK passed into a record reader
byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
}
// decrypt the FileStats
Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
// FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
}
} catch (IOException e) {
throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
}
}
}
return ImmutableList.copyOf(decryptedFileStats);
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class DwrfMetadataReader method readFooter.
@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream, DwrfEncryptionProvider dwrfEncryptionProvider, DwrfKeyProvider dwrfKeyProvider, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) throws IOException {
long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.Footer footer = DwrfProto.Footer.parseFrom(input);
List<ColumnStatistics> fileStats = toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false);
List<StripeInformation> fileStripes = toStripeInformation(footer.getStripesList());
List<OrcType> types = toType(footer.getTypesList());
Optional<DwrfEncryption> encryption = footer.hasEncryption() ? Optional.of(toEncryption(footer.getEncryption())) : Optional.empty();
Optional<List<Integer>> stripeCacheOffsets = Optional.of(footer.getStripeCacheOffsetsList());
if (encryption.isPresent()) {
Map<Integer, Slice> keys = dwrfKeyProvider.getIntermediateKeys(types);
EncryptionLibrary encryptionLibrary = dwrfEncryptionProvider.getEncryptionLibrary(encryption.get().getKeyProvider());
fileStats = decryptAndCombineFileStatistics(hiveWriterVersion, encryption.get(), encryptionLibrary, fileStats, fileStripes, keys, orcDataSource, decompressor);
}
runtimeStats.addMetricValue("DwrfReadFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
OptionalLong rawSize = footer.hasRawDataSize() ? OptionalLong.of(footer.getRawDataSize()) : OptionalLong.empty();
return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), rawSize, fileStripes, types, fileStats, toUserMetadata(footer.getMetadataList()), encryption, stripeCacheOffsets);
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class DwrfMetadataReader method readStripeFooter.
@Override
public StripeFooter readStripeFooter(OrcDataSourceId orcDataSourceId, List<OrcType> types, InputStream inputStream) throws IOException {
long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.StripeFooter stripeFooter = DwrfProto.StripeFooter.parseFrom(input);
runtimeStats.addMetricValue("DwrfReadStripeFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
return new StripeFooter(toStream(orcDataSourceId, stripeFooter.getStreamsList()), toColumnEncoding(types, stripeFooter.getColumnsList()), stripeFooter.getEncryptedGroupsList().stream().map(OrcMetadataReader::byteStringToSlice).collect(toImmutableList()));
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class OrcMetadataReader method readStripeFooter.
@Override
public StripeFooter readStripeFooter(HiveWriterVersion hiveWriterVersion, List<OrcType> types, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()));
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class OrcMetadataReader method readRowIndexes.
@Override
public List<RowGroupIndex> readRowIndexes(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
OrcProto.RowIndex rowIndex = OrcProto.RowIndex.parseFrom(input);
return ImmutableList.copyOf(Iterables.transform(rowIndex.getEntryList(), rowIndexEntry -> toRowGroupIndex(hiveWriterVersion, rowIndexEntry)));
}
Aggregations