use of com.facebook.presto.orc.DwrfDataEncryptor in project presto by prestodb.
the class DwrfMetadataReader method decryptAndCombineFileStatistics.
private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
requireNonNull(dwrfEncryption, "dwrfEncryption is null");
requireNonNull(encryptionLibrary, "encryptionLibrary is null");
if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
return fileStats;
}
ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
List<byte[]> stripeKeys = null;
if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
stripeKeys = fileStripes.get(0).getKeyMetadata();
checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
}
// node is added to the encryption group
for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
DwrfDataEncryptor decryptor = null;
List<Integer> nodes = encryptionGroup.getNodes();
for (int i = 0; i < nodes.size(); i++) {
Integer nodeId = nodes.get(i);
// do decryption only for those nodes that are requested (part of the projection)
if (!nodeToIntermediateKeys.containsKey(nodeId)) {
continue;
}
if (decryptor == null) {
// DEK for the FileStats can be stored either in the footer or/and in the first stripe.
// The key in the footer takes priority over the key in the first stripe.
byte[] encryptedDataKeyWithMeta = null;
if (encryptionGroup.getKeyMetadata().isPresent()) {
encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
} else if (stripeKeys != null) {
encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
}
checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
// decrypt the DEK which is encrypted using the IEK passed into a record reader
byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
}
// decrypt the FileStats
Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
// FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
}
} catch (IOException e) {
throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
}
}
}
return ImmutableList.copyOf(decryptedFileStats);
}
use of com.facebook.presto.orc.DwrfDataEncryptor in project presto by prestodb.
the class ColumnWriters method createColumnWriter.
public static ColumnWriter createColumnWriter(int columnIndex, List<OrcType> orcTypes, Type type, ColumnWriterOptions columnWriterOptions, OrcEncoding orcEncoding, DateTimeZone hiveStorageTimeZone, DwrfEncryptionInfo dwrfEncryptors, MetadataWriter metadataWriter) {
requireNonNull(type, "type is null");
OrcType orcType = orcTypes.get(columnIndex);
Optional<DwrfDataEncryptor> dwrfEncryptor = dwrfEncryptors.getEncryptorByNodeId(columnIndex);
switch(orcType.getOrcTypeKind()) {
case BOOLEAN:
return new BooleanColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
case FLOAT:
return new FloatColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
case DOUBLE:
return new DoubleColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
case BYTE:
return new ByteColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, metadataWriter);
case DATE:
checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, DateStatisticsBuilder::new, metadataWriter);
case SHORT:
return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, IntegerStatisticsBuilder::new, metadataWriter);
case INT:
case LONG:
if (columnWriterOptions.isIntegerDictionaryEncodingEnabled() && orcEncoding == DWRF) {
// ORC V1 does not support Integer Dictionary encoding. DWRF supports Integer dictionary encoding.
return new LongDictionaryColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, metadataWriter);
}
return new LongColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, IntegerStatisticsBuilder::new, metadataWriter);
case DECIMAL:
checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
return new DecimalColumnWriter(columnIndex, type, columnWriterOptions, orcEncoding, metadataWriter);
case TIMESTAMP:
return new TimestampColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, hiveStorageTimeZone, metadataWriter);
case BINARY:
return new SliceDirectColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, BinaryStatisticsBuilder::new, metadataWriter);
case CHAR:
checkArgument(orcEncoding != DWRF, "DWRF does not support %s type", type);
// fall through
case VARCHAR:
case STRING:
if (columnWriterOptions.isStringDictionaryEncodingEnabled()) {
return new SliceDictionaryColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, metadataWriter);
}
int stringStatisticsLimit = columnWriterOptions.getStringStatisticsLimit();
return new SliceDirectColumnWriter(columnIndex, type, columnWriterOptions, dwrfEncryptor, orcEncoding, () -> new StringStatisticsBuilder(stringStatisticsLimit), metadataWriter);
case LIST:
{
int fieldColumnIndex = orcType.getFieldTypeIndex(0);
Type fieldType = type.getTypeParameters().get(0);
ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
return new ListColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, orcEncoding, elementWriter, metadataWriter);
}
case MAP:
{
ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter);
return new MapColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, orcEncoding, keyWriter, valueWriter, metadataWriter);
}
case STRUCT:
{
ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
int fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
Type fieldType = type.getTypeParameters().get(fieldId);
fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, columnWriterOptions, orcEncoding, hiveStorageTimeZone, dwrfEncryptors, metadataWriter));
}
return new StructColumnWriter(columnIndex, columnWriterOptions, dwrfEncryptor, fieldWriters.build(), metadataWriter);
}
}
throw new IllegalArgumentException("Unsupported type: " + type);
}
Aggregations