Search in sources :

Example 6 with StripeFooter

use of com.facebook.presto.orc.metadata.StripeFooter in project presto by prestodb.

the class OrcWriter method bufferStripeData.

/**
 * Collect the data for for the stripe.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<DataOutput> bufferStripeData(long stripeStartOffset, FlushReason flushReason) throws IOException {
    if (stripeRowCount == 0) {
        return ImmutableList.of();
    }
    List<DataOutput> outputData = new ArrayList<>();
    List<Stream> unencryptedStreams = new ArrayList<>(columnWriters.size() * 3);
    Multimap<Integer, Stream> encryptedStreams = ArrayListMultimap.create();
    // get index streams
    long indexLength = 0;
    long offset = 0;
    int previousEncryptionGroup = -1;
    for (ColumnWriter columnWriter : columnWriters) {
        for (StreamDataOutput indexStream : columnWriter.getIndexStreams()) {
            // The ordering is critical because the stream only contain a length with no offset.
            // if the previous stream was part of a different encryption group, need to specify an offset so we know the column order
            outputData.add(indexStream);
            Optional<Integer> encryptionGroup = dwrfEncryptionInfo.getGroupByNodeId(indexStream.getStream().getColumn());
            if (encryptionGroup.isPresent()) {
                Stream stream = previousEncryptionGroup == encryptionGroup.get() ? indexStream.getStream() : indexStream.getStream().withOffset(offset);
                encryptedStreams.put(encryptionGroup.get(), stream);
                previousEncryptionGroup = encryptionGroup.get();
            } else {
                Stream stream = previousEncryptionGroup == -1 ? indexStream.getStream() : indexStream.getStream().withOffset(offset);
                unencryptedStreams.add(stream);
                previousEncryptionGroup = -1;
            }
            offset += indexStream.size();
            indexLength += indexStream.size();
        }
    }
    if (dwrfStripeCacheWriter.isPresent()) {
        dwrfStripeCacheWriter.get().addIndexStreams(ImmutableList.copyOf(outputData), indexLength);
    }
    // data streams (sorted by size)
    long dataLength = 0;
    List<StreamDataOutput> dataStreams = new ArrayList<>(columnWriters.size() * 2);
    for (ColumnWriter columnWriter : columnWriters) {
        List<StreamDataOutput> streams = columnWriter.getDataStreams();
        dataStreams.addAll(streams);
        dataLength += streams.stream().mapToLong(StreamDataOutput::size).sum();
    }
    streamLayout.reorder(dataStreams);
    // add data streams
    for (StreamDataOutput dataStream : dataStreams) {
        // The ordering is critical because the stream only contains a length with no offset.
        // if the previous stream was part of a different encryption group, need to specify an offset so we know the column order
        outputData.add(dataStream);
        Optional<Integer> encryptionGroup = dwrfEncryptionInfo.getGroupByNodeId(dataStream.getStream().getColumn());
        if (encryptionGroup.isPresent()) {
            Stream stream = previousEncryptionGroup == encryptionGroup.get() ? dataStream.getStream() : dataStream.getStream().withOffset(offset);
            encryptedStreams.put(encryptionGroup.get(), stream);
            previousEncryptionGroup = encryptionGroup.get();
        } else {
            Stream stream = previousEncryptionGroup == -1 ? dataStream.getStream() : dataStream.getStream().withOffset(offset);
            unencryptedStreams.add(stream);
            previousEncryptionGroup = -1;
        }
        offset += dataStream.size();
    }
    Map<Integer, ColumnEncoding> columnEncodings = new HashMap<>();
    columnWriters.forEach(columnWriter -> columnEncodings.putAll(columnWriter.getColumnEncodings()));
    Map<Integer, ColumnStatistics> columnStatistics = new HashMap<>();
    columnWriters.forEach(columnWriter -> columnStatistics.putAll(columnWriter.getColumnStripeStatistics()));
    // the 0th column is a struct column for the whole row
    columnEncodings.put(0, new ColumnEncoding(DIRECT, 0));
    columnStatistics.put(0, new ColumnStatistics((long) stripeRowCount, null));
    Map<Integer, ColumnEncoding> unencryptedColumnEncodings = columnEncodings.entrySet().stream().filter(entry -> !dwrfEncryptionInfo.getGroupByNodeId(entry.getKey()).isPresent()).collect(toImmutableMap(Entry::getKey, Entry::getValue));
    Map<Integer, ColumnEncoding> encryptedColumnEncodings = columnEncodings.entrySet().stream().filter(entry -> dwrfEncryptionInfo.getGroupByNodeId(entry.getKey()).isPresent()).collect(toImmutableMap(Entry::getKey, Entry::getValue));
    List<Slice> encryptedGroups = createEncryptedGroups(encryptedStreams, encryptedColumnEncodings);
    StripeFooter stripeFooter = new StripeFooter(unencryptedStreams, unencryptedColumnEncodings, encryptedGroups);
    Slice footer = metadataWriter.writeStripeFooter(stripeFooter);
    outputData.add(createDataOutput(footer));
    dwrfStripeCacheWriter.ifPresent(stripeCacheWriter -> stripeCacheWriter.addStripeFooter(createDataOutput(footer)));
    // create final stripe statistics
    StripeStatistics statistics = new StripeStatistics(toDenseList(columnStatistics, orcTypes.size()));
    recordValidation(validation -> validation.addStripeStatistics(stripeStartOffset, statistics));
    StripeInformation stripeInformation = new StripeInformation(stripeRowCount, stripeStartOffset, indexLength, dataLength, footer.length(), OptionalLong.of(stripeRawSize), dwrfEncryptionInfo.getEncryptedKeyMetadatas());
    ClosedStripe closedStripe = new ClosedStripe(stripeInformation, statistics);
    closedStripes.add(closedStripe);
    closedStripesRetainedBytes += closedStripe.getRetainedSizeInBytes();
    recordValidation(validation -> validation.addStripe(stripeInformation.getNumberOfRows()));
    stats.recordStripeWritten(flushPolicy.getStripeMinBytes(), flushPolicy.getStripeMaxBytes(), dictionaryMaxMemoryBytes, flushReason, dictionaryCompressionOptimizer.getDictionaryMemoryBytes(), stripeInformation);
    return outputData;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) DataOutput(com.facebook.presto.common.io.DataOutput) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) IntStream(java.util.stream.IntStream) Stream(com.facebook.presto.orc.metadata.Stream) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Aggregations

StripeFooter (com.facebook.presto.orc.metadata.StripeFooter)6 ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)4 Stream (com.facebook.presto.orc.metadata.Stream)4 ImmutableList (com.google.common.collect.ImmutableList)4 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 DataSize (io.airlift.units.DataSize)3 InputStream (java.io.InputStream)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Page (com.facebook.presto.common.Page)2 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)2 OrcWriteValidationMode (com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode)2 InvalidCheckpointException (com.facebook.presto.orc.checkpoint.InvalidCheckpointException)2 Footer (com.facebook.presto.orc.metadata.Footer)2 StripeInformation (com.facebook.presto.orc.metadata.StripeInformation)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 RuntimeStats (com.facebook.presto.common.RuntimeStats)1 Block (com.facebook.presto.common.block.Block)1 DataOutput (com.facebook.presto.common.io.DataOutput)1