Search in sources :

Example 1 with SharedBuffer

use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.

the class TestLongDictionaryProvider method createLongDictionaryStreamSources.

private InputStreamSources createLongDictionaryStreamSources(Map<NodeId, long[]> streams, OrcAggregatedMemoryContext aggregatedMemoryContext) {
    SharedBuffer decompressionBuffer = new SharedBuffer(aggregatedMemoryContext.newOrcLocalMemoryContext("sharedDecompressionBuffer"));
    ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamsBuilder = ImmutableMap.builder();
    for (Map.Entry<NodeId, long[]> entry : streams.entrySet()) {
        StreamId streamId = entry.getKey().toDictionaryDataStreamId();
        DynamicSliceOutput sliceOutput = createSliceOutput(streamId, entry.getValue());
        ValueInputStream<?> valueStream = createValueStream(sliceOutput.slice(), aggregatedMemoryContext, decompressionBuffer);
        StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, LONG, ColumnEncoding.ColumnEncodingKind.DICTIONARY);
        InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint);
        dictionaryStreamsBuilder.put(streamId, streamSource);
    }
    return new InputStreamSources(dictionaryStreamsBuilder.build());
}
Also used : ImmutableMap(com.google.common.collect.ImmutableMap) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) InputStreamSource(com.facebook.presto.orc.stream.InputStreamSource) InputStreamSources(com.facebook.presto.orc.stream.InputStreamSources) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint)

Example 2 with SharedBuffer

use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.

the class AbstractOrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeSystemMemoryContext.close();
    currentStripeSystemMemoryContext = systemMemoryUsage.newOrcAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    if (currentStripe >= 0) {
        if (stripeStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build());
            statisticsValidation.reset();
        }
    }
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    validateWriteStripe(stripeInformation.getNumberOfRows());
    List<byte[]> stripeDecryptionKeyMetadata = getDecryptionKeyMetadata(currentStripe, stripes);
    // set dwrfEncryptionInfo
    if ((!stripeDecryptionKeyMetadata.isEmpty() && !dwrfEncryptionInfo.isPresent()) || (dwrfEncryptionInfo.isPresent() && !stripeDecryptionKeyMetadata.equals(dwrfEncryptionInfo.get().getEncryptedKeyMetadatas()))) {
        verify(encryptionLibrary.isPresent(), "encryptionLibrary is absent");
        dwrfEncryptionInfo = Optional.of(createDwrfEncryptionInfo(encryptionLibrary.get(), stripeDecryptionKeyMetadata, intermediateKeyMetadata, dwrfEncryptionGroupMap));
    }
    SharedBuffer sharedDecompressionBuffer = new SharedBuffer(currentStripeSystemMemoryContext.newOrcLocalMemoryContext("sharedDecompressionBuffer"));
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext, dwrfEncryptionInfo, sharedDecompressionBuffer);
    if (stripe != null) {
        for (StreamReader column : streamReaders) {
            if (column != null) {
                column.startStripe(stripe);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
}
Also used : SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) StreamReader(com.facebook.presto.orc.reader.StreamReader) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 3 with SharedBuffer

use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.

the class DwrfMetadataReader method decryptAndCombineFileStatistics.

private List<ColumnStatistics> decryptAndCombineFileStatistics(HiveWriterVersion hiveWriterVersion, DwrfEncryption dwrfEncryption, EncryptionLibrary encryptionLibrary, List<ColumnStatistics> fileStats, List<StripeInformation> fileStripes, Map<Integer, Slice> nodeToIntermediateKeys, OrcDataSource orcDataSource, Optional<OrcDecompressor> decompressor) {
    requireNonNull(dwrfEncryption, "dwrfEncryption is null");
    requireNonNull(encryptionLibrary, "encryptionLibrary is null");
    if (nodeToIntermediateKeys.isEmpty() || fileStats.isEmpty()) {
        return fileStats;
    }
    ColumnStatistics[] decryptedFileStats = fileStats.toArray(new ColumnStatistics[0]);
    List<EncryptionGroup> encryptionGroups = dwrfEncryption.getEncryptionGroups();
    List<byte[]> stripeKeys = null;
    if (!fileStripes.isEmpty() && !fileStripes.get(0).getKeyMetadata().isEmpty()) {
        stripeKeys = fileStripes.get(0).getKeyMetadata();
        checkState(stripeKeys.size() == encryptionGroups.size(), "Number of keys in the first stripe must be the same as the number of encryption groups");
    }
    // node is added to the encryption group
    for (int groupIdx = 0; groupIdx < encryptionGroups.size(); groupIdx++) {
        EncryptionGroup encryptionGroup = encryptionGroups.get(groupIdx);
        DwrfDataEncryptor decryptor = null;
        List<Integer> nodes = encryptionGroup.getNodes();
        for (int i = 0; i < nodes.size(); i++) {
            Integer nodeId = nodes.get(i);
            // do decryption only for those nodes that are requested (part of the projection)
            if (!nodeToIntermediateKeys.containsKey(nodeId)) {
                continue;
            }
            if (decryptor == null) {
                // DEK for the FileStats can be stored either in the footer or/and in the first stripe.
                // The key in the footer takes priority over the key in the first stripe.
                byte[] encryptedDataKeyWithMeta = null;
                if (encryptionGroup.getKeyMetadata().isPresent()) {
                    encryptedDataKeyWithMeta = encryptionGroup.getKeyMetadata().get().byteArray();
                } else if (stripeKeys != null) {
                    encryptedDataKeyWithMeta = stripeKeys.get(groupIdx);
                }
                checkState(encryptedDataKeyWithMeta != null, "DEK for %s encryption group is null", groupIdx);
                // decrypt the DEK which is encrypted using the IEK passed into a record reader
                byte[] intermediateKey = nodeToIntermediateKeys.get(nodeId).byteArray();
                byte[] dataKey = encryptionLibrary.decryptKey(intermediateKey, encryptedDataKeyWithMeta, 0, encryptedDataKeyWithMeta.length);
                decryptor = new DwrfDataEncryptor(dataKey, encryptionLibrary);
            }
            // decrypt the FileStats
            Slice encryptedFileStats = encryptionGroup.getStatistics().get(i);
            try (OrcInputStream inputStream = new OrcInputStream(orcDataSource.getId(), // Memory is not accounted as the buffer is expected to be tiny and will be immediately discarded
            new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), new BasicSliceInput(encryptedFileStats), decompressor, Optional.of(decryptor), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, encryptedFileStats.length())) {
                CodedInputStream input = CodedInputStream.newInstance(inputStream);
                DwrfProto.FileStatistics nodeStats = DwrfProto.FileStatistics.parseFrom(input);
                // FileStatistics contains ColumnStatistics for the node and all its child nodes (subtree)
                for (int statsIdx = 0; statsIdx < nodeStats.getStatisticsCount(); statsIdx++) {
                    decryptedFileStats[nodeId + statsIdx] = toColumnStatistics(hiveWriterVersion, nodeStats.getStatistics(statsIdx), false, null);
                }
            } catch (IOException e) {
                throw new OrcCorruptionException(e, orcDataSource.getId(), "Failed to read or decrypt FileStatistics for node %s", nodeId);
            }
        }
    }
    return ImmutableList.copyOf(decryptedFileStats);
}
Also used : ColumnStatistics.createColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.createColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) DwrfDataEncryptor(com.facebook.presto.orc.DwrfDataEncryptor) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) IOException(java.io.IOException) BasicSliceInput(io.airlift.slice.BasicSliceInput) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) OrcMetadataReader.byteStringToSlice(com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice) Slice(io.airlift.slice.Slice) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 4 with SharedBuffer

use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.

the class OrcTester method getFileMetadata.

public static FileMetadata getFileMetadata(File inputFile, OrcEncoding encoding) throws IOException {
    boolean zstdJniDecompressionEnabled = true;
    DataSize dataSize = new DataSize(1, MEGABYTE);
    OrcDataSource orcDataSource = new FileOrcDataSource(inputFile, dataSize, dataSize, dataSize, true);
    RuntimeStats runtimeStats = new RuntimeStats();
    OrcReader reader = new OrcReader(orcDataSource, encoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(dataSize, dataSize, dataSize, zstdJniDecompressionEnabled), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, runtimeStats);
    Footer footer = reader.getFooter();
    Optional<OrcDecompressor> decompressor = createOrcDecompressor(orcDataSource.getId(), reader.getCompressionKind(), reader.getBufferSize(), zstdJniDecompressionEnabled);
    ImmutableList.Builder<StripeFooter> stripes = new ImmutableList.Builder<>();
    for (StripeInformation stripe : footer.getStripes()) {
        // read the footer
        byte[] tailBuffer = new byte[toIntExact(stripe.getFooterLength())];
        orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer);
        try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, Optional.empty(), new TestingHiveOrcAggregatedMemoryContext(), tailBuffer.length)) {
            StripeFooter stripeFooter = encoding.createMetadataReader(runtimeStats).readStripeFooter(orcDataSource.getId(), footer.getTypes(), inputStream);
            stripes.add(stripeFooter);
        }
    }
    return new FileMetadata(footer, stripes.build());
}
Also used : OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) InputStream(java.io.InputStream) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) OrcDecompressor.createOrcDecompressor(com.facebook.presto.orc.OrcDecompressor.createOrcDecompressor) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) DataSize(io.airlift.units.DataSize) Footer(com.facebook.presto.orc.metadata.Footer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 5 with SharedBuffer

use of com.facebook.presto.orc.stream.SharedBuffer in project presto by prestodb.

the class AbstractTestDwrfStripeCaching method readFileFooter.

static DwrfProto.Footer readFileFooter(File orcFile) {
    try (RandomAccessFile file = new RandomAccessFile(orcFile, "r")) {
        // read postscript size
        file.seek(file.length() - 1);
        int postScriptSize = file.read() & 0xff;
        // read postscript
        long postScriptPosition = file.length() - postScriptSize - 1;
        byte[] postScriptBytes = readBytes(file, postScriptPosition, postScriptSize);
        CodedInputStream postScriptInput = CodedInputStream.newInstance(postScriptBytes, 0, postScriptSize);
        DwrfProto.PostScript postScript = DwrfProto.PostScript.parseFrom(postScriptInput);
        // read footer
        long footerPosition = postScriptPosition - postScript.getFooterLength();
        int footerLength = toIntExact(postScript.getFooterLength());
        byte[] footerBytes = readBytes(file, footerPosition, postScript.getFooterLength());
        int compressionBufferSize = toIntExact(postScript.getCompressionBlockSize());
        OrcDataSourceId dataSourceId = new OrcDataSourceId(orcFile.getName());
        Optional<OrcDecompressor> decompressor = OrcDecompressor.createOrcDecompressor(dataSourceId, ZLIB, compressionBufferSize);
        InputStream footerInputStream = new OrcInputStream(dataSourceId, new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(footerBytes).slice(0, footerLength).getInput(), decompressor, Optional.empty(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, footerLength);
        return DwrfProto.Footer.parseFrom(footerInputStream);
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) InputStream(java.io.InputStream) UncheckedIOException(java.io.UncheckedIOException) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) RandomAccessFile(java.io.RandomAccessFile)

Aggregations

SharedBuffer (com.facebook.presto.orc.stream.SharedBuffer)6 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)4 InputStream (java.io.InputStream)3 Checkpoints.getDictionaryStreamCheckpoint (com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)2 StreamCheckpoint (com.facebook.presto.orc.checkpoint.StreamCheckpoint)2 StripeInformation (com.facebook.presto.orc.metadata.StripeInformation)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 CodedInputStream (com.facebook.presto.orc.protobuf.CodedInputStream)2 Slice (io.airlift.slice.Slice)2 IOException (java.io.IOException)2 RuntimeStats (com.facebook.presto.common.RuntimeStats)1 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 DwrfDataEncryptor (com.facebook.presto.orc.DwrfDataEncryptor)1 OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)1 OrcDecompressor.createOrcDecompressor (com.facebook.presto.orc.OrcDecompressor.createOrcDecompressor)1 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)1 Footer (com.facebook.presto.orc.metadata.Footer)1 OrcMetadataReader.byteStringToSlice (com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice)1 StripeFooter (com.facebook.presto.orc.metadata.StripeFooter)1 ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)1