use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.
the class TestLongDictionaryProvider method createLongDictionaryStreamSources.
private InputStreamSources createLongDictionaryStreamSources(Map<NodeId, long[]> streams, OrcAggregatedMemoryContext aggregatedMemoryContext) {
SharedBuffer decompressionBuffer = new SharedBuffer(aggregatedMemoryContext.newOrcLocalMemoryContext("sharedDecompressionBuffer"));
ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamsBuilder = ImmutableMap.builder();
for (Map.Entry<NodeId, long[]> entry : streams.entrySet()) {
StreamId streamId = entry.getKey().toDictionaryDataStreamId();
DynamicSliceOutput sliceOutput = createSliceOutput(streamId, entry.getValue());
ValueInputStream<?> valueStream = createValueStream(sliceOutput.slice(), aggregatedMemoryContext, decompressionBuffer);
StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, LONG, ColumnEncoding.ColumnEncodingKind.DICTIONARY);
InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint);
dictionaryStreamsBuilder.put(streamId, streamSource);
}
return new InputStreamSources(dictionaryStreamsBuilder.build());
}
use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.
the class AbstractOrcRecordReader method advanceToNextStripe.
private void advanceToNextStripe() throws IOException {
currentStripeSystemMemoryContext.close();
currentStripeSystemMemoryContext = systemMemoryUsage.newOrcAggregatedMemoryContext();
rowGroups = ImmutableList.<RowGroup>of().iterator();
if (currentStripe >= 0) {
if (stripeStatisticsValidation.isPresent()) {
OrcWriteValidation.StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
long offset = stripes.get(currentStripe).getOffset();
writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build());
statisticsValidation.reset();
}
}
currentStripe++;
if (currentStripe >= stripes.size()) {
return;
}
if (currentStripe > 0) {
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
}
StripeInformation stripeInformation = stripes.get(currentStripe);
validateWriteStripe(stripeInformation.getNumberOfRows());
List<byte[]> stripeDecryptionKeyMetadata = getDecryptionKeyMetadata(currentStripe, stripes);
// set dwrfEncryptionInfo
if ((!stripeDecryptionKeyMetadata.isEmpty() && !dwrfEncryptionInfo.isPresent()) || (dwrfEncryptionInfo.isPresent() && !stripeDecryptionKeyMetadata.equals(dwrfEncryptionInfo.get().getEncryptedKeyMetadatas()))) {
verify(encryptionLibrary.isPresent(), "encryptionLibrary is absent");
dwrfEncryptionInfo = Optional.of(createDwrfEncryptionInfo(encryptionLibrary.get(), stripeDecryptionKeyMetadata, intermediateKeyMetadata, dwrfEncryptionGroupMap));
}
SharedBuffer sharedDecompressionBuffer = new SharedBuffer(currentStripeSystemMemoryContext.newOrcLocalMemoryContext("sharedDecompressionBuffer"));
Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext, dwrfEncryptionInfo, sharedDecompressionBuffer);
if (stripe != null) {
for (StreamReader column : streamReaders) {
if (column != null) {
column.startStripe(stripe);
}
}
rowGroups = stripe.getRowGroups().iterator();
}
}
use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.
the class DwrfMetadataReader method decryptAndCombineFileStatistics.
private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
requireNonNull(dwrfEncryption, "dwrfEncryption is null");
requireNonNull(encryptionLibrary, "encryptionLibrary is null");
if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
return fileStats;
}
ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
List<byte[]> stripeKeys = null;
if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
stripeKeys = fileStripes.get(0).getKeyMetadata();
checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
}
// node is added to the encryption group
for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
DwrfDataEncryptor decryptor = null;
List<Integer> nodes = encryptionGroup.getNodes();
for (int i = 0; i < nodes.size(); i++) {
Integer nodeId = nodes.get(i);
// do decryption only for those nodes that are requested (part of the projection)
if (!nodeToIntermediateKeys.containsKey(nodeId)) {
continue;
}
if (decryptor == null) {
// DEK for the FileStats can be stored either in the footer or/and in the first stripe.
// The key in the footer takes priority over the key in the first stripe.
byte[] encryptedDataKeyWithMeta = null;
if (encryptionGroup.getKeyMetadata().isPresent()) {
encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
} else if (stripeKeys != null) {
encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
}
checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
// decrypt the DEK which is encrypted using the IEK passed into a record reader
byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
}
// decrypt the FileStats
Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
// FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
}
} catch (IOException e) {
throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
}
}
}
return ImmutableList.copyOf(decryptedFileStats);
}
use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.
the class OrcTester method getFileMetadata.
public static FileMetadata getFileMetadata(File inputFile, OrcEncoding encoding) throws IOException {
boolean zstdJniDecompressionEnabled = true;
DataSize dataSize = new DataSize(1, MEGABYTE);
OrcDataSource orcDataSource = new FileOrcDataSource(inputFile, dataSize, dataSize, dataSize, true);
RuntimeStats runtimeStats = new RuntimeStats();
OrcReader reader = new OrcReader(orcDataSource, encoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(dataSize, dataSize, dataSize, zstdJniDecompressionEnabled), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, runtimeStats);
Footer footer = reader.getFooter();
Optional<OrcDecompressor> decompressor = createOrcDecompressor(orcDataSource.getId(), reader.getCompressionKind(), reader.getBufferSize(), zstdJniDecompressionEnabled);
ImmutableList.Builder<StripeFooter> stripes = new ImmutableList.Builder<>();
for (StripeInformation stripe : footer.getStripes()) {
// read the footer
byte[] tailBuffer = new byte[toIntExact(stripe.getFooterLength())];
orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer);
try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, Optional.empty(), new TestingHiveOrcAggregatedMemoryContext(), tailBuffer.length)) {
StripeFooter stripeFooter = encoding.createMetadataReader(runtimeStats).readStripeFooter(orcDataSource.getId(), footer.getTypes(), inputStream);
stripes.add(stripeFooter);
}
}
return new FileMetadata(footer, stripes.build());
}
use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.
the class AbstractTestDwrfStripeCaching method readFileFooter.
static DwrfProto.Footer readFileFooter(File orcFile) {
try (RandomAccessFile file = new RandomAccessFile(orcFile, "r")) {
// read postscript size
file.seek(file.length() - 1);
int postScriptSize = file.read() & 0xff;
// read postscript
long postScriptPosition = file.length() - postScriptSize - 1;
byte[] postScriptBytes = readBytes(file, postScriptPosition, postScriptSize);
CodedInputStream postScriptInput = CodedInputStream.newInstance(postScriptBytes, 0, postScriptSize);
DwrfProto.PostScript postScript = DwrfProto.PostScript.parseFrom(postScriptInput);
// read footer
long footerPosition = postScriptPosition - postScript.getFooterLength();
int footerLength = toIntExact(postScript.getFooterLength());
byte[] footerBytes = readBytes(file, footerPosition, postScript.getFooterLength());
int compressionBufferSize = toIntExact(postScript.getCompressionBlockSize());
OrcDataSourceId dataSourceId = new OrcDataSourceId(orcFile.getName());
Optional<OrcDecompressor> decompressor = OrcDecompressor.createOrcDecompressor(dataSourceId, ZLIB, compressionBufferSize);
InputStream footerInputStream = new OrcInputStream(dataSourceId, new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(footerBytes).slice(0, footerLength).getInput(), decompressor, Optional.empty(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, footerLength);
return DwrfProto.Footer.parseFrom(footerInputStream);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
Aggregations