Search in sources :

Example 1 with Metadata

use of com.facebook.presto.orc.metadata.Metadata in project presto by prestodb.

the class OrcWriter method bufferFileFooter.

/**
 * Collect the data for for the file footer.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<DataOutput> bufferFileFooter() throws IOException {
    List<DataOutput> outputData = new ArrayList<>();
    Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).collect(toList()));
    Slice metadataSlice = metadataWriter.writeMetadata(metadata);
    outputData.add(createDataOutput(metadataSlice));
    numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
    List<ColumnStatistics> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
    recordValidation(validation -> validation.setFileStatistics(fileStats));
    Map<String, Slice> userMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
    unencryptedStats = new ArrayList<>();
    Map<Integer, Map<Integer, Slice>> encryptedStats = new HashMap<>();
    addStatsRecursive(fileStats, 0, new HashMap<>(), unencryptedStats, encryptedStats);
    Optional<DwrfEncryption> dwrfEncryption;
    if (dwrfWriterEncryption.isPresent()) {
        ImmutableList.Builder<EncryptionGroup> encryptionGroupBuilder = ImmutableList.builder();
        List<WriterEncryptionGroup> writerEncryptionGroups = dwrfWriterEncryption.get().getWriterEncryptionGroups();
        for (int i = 0; i < writerEncryptionGroups.size(); i++) {
            WriterEncryptionGroup group = writerEncryptionGroups.get(i);
            Map<Integer, Slice> groupStats = encryptedStats.get(i);
            encryptionGroupBuilder.add(new EncryptionGroup(group.getNodes(), // reader will just use key metadata from the stripe
            Optional.empty(), group.getNodes().stream().map(groupStats::get).collect(toList())));
        }
        dwrfEncryption = Optional.of(new DwrfEncryption(dwrfWriterEncryption.get().getKeyProvider(), encryptionGroupBuilder.build()));
    } else {
        dwrfEncryption = Optional.empty();
    }
    Optional<DwrfStripeCacheData> dwrfStripeCacheData = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getDwrfStripeCacheData);
    Slice dwrfStripeCacheSlice = metadataWriter.writeDwrfStripeCache(dwrfStripeCacheData);
    outputData.add(createDataOutput(dwrfStripeCacheSlice));
    Optional<List<Integer>> dwrfStripeCacheOffsets = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getOffsets);
    Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, OptionalLong.of(rawSize), closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toList()), orcTypes, ImmutableList.copyOf(unencryptedStats), userMetadata, dwrfEncryption, dwrfStripeCacheOffsets);
    closedStripes.clear();
    closedStripesRetainedBytes = 0;
    Slice footerSlice = metadataWriter.writeFooter(footer);
    outputData.add(createDataOutput(footerSlice));
    recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
    Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), columnWriterOptions.getCompressionKind(), columnWriterOptions.getCompressionMaxBufferSize(), dwrfStripeCacheData);
    outputData.add(createDataOutput(postscriptSlice));
    outputData.add(createDataOutput(Slices.wrappedBuffer((byte) postscriptSlice.length())));
    return outputData;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) DataOutput(com.facebook.presto.common.io.DataOutput) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Metadata(com.facebook.presto.orc.metadata.Metadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Footer(com.facebook.presto.orc.metadata.Footer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption)

Aggregations

Page (com.facebook.presto.common.Page)1 DataOutput (com.facebook.presto.common.io.DataOutput)1 DataOutput.createDataOutput (com.facebook.presto.common.io.DataOutput.createDataOutput)1 DataSink (com.facebook.presto.common.io.DataSink)1 Type (com.facebook.presto.common.type.Type)1 UNENCRYPTED (com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED)1 DwrfEncryptionInfo.createNodeToGroupMap (com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap)1 CLOSED (com.facebook.presto.orc.FlushReason.CLOSED)1 DWRF (com.facebook.presto.orc.OrcEncoding.DWRF)1 OrcReader.validateFile (com.facebook.presto.orc.OrcReader.validateFile)1 OrcWriteValidationBuilder (com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder)1 OrcWriteValidationMode (com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode)1 ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)1 DIRECT (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT)1 CompressedMetadataWriter (com.facebook.presto.orc.metadata.CompressedMetadataWriter)1 CompressionKind (com.facebook.presto.orc.metadata.CompressionKind)1 DwrfEncryption (com.facebook.presto.orc.metadata.DwrfEncryption)1 DwrfMetadataWriter.toFileStatistics (com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics)1 DwrfMetadataWriter.toStripeEncryptionGroup (com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup)1 DwrfStripeCacheData (com.facebook.presto.orc.metadata.DwrfStripeCacheData)1