Search in sources :

Example 1 with Metadata

use of io.trino.orc.metadata.Metadata in project trino by trinodb.

the class OrcWriter method bufferFileFooter.

/**
 * Collect the data for the file footer.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<OrcDataOutput> bufferFileFooter() throws IOException {
    List<OrcDataOutput> outputData = new ArrayList<>();
    Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).map(Optional::of).collect(toList()));
    Slice metadataSlice = metadataWriter.writeMetadata(metadata);
    outputData.add(createDataOutput(metadataSlice));
    fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
    fileStatsRetainedBytes = fileStats.map(stats -> stats.stream().mapToLong(ColumnStatistics::getRetainedSizeInBytes).sum()).orElse(0L);
    recordValidation(validation -> validation.setFileStatistics(fileStats));
    Map<String, Slice> userMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
    Footer footer = new Footer(fileRowCount, rowGroupMaxRowCount == 0 ? OptionalInt.empty() : OptionalInt.of(rowGroupMaxRowCount), closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toImmutableList()), orcTypes, fileStats, userMetadata, // writer id will be set by MetadataWriter
    Optional.empty());
    closedStripes.clear();
    closedStripesRetainedBytes = 0;
    Slice footerSlice = metadataWriter.writeFooter(footer);
    outputData.add(createDataOutput(footerSlice));
    recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
    Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), compression, maxCompressionBufferSize);
    outputData.add(createDataOutput(postscriptSlice));
    outputData.add(createDataOutput(Slices.wrappedBuffer(UnsignedBytes.checkedCast(postscriptSlice.length()))));
    return outputData;
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) ColumnEncoding(io.trino.orc.metadata.ColumnEncoding) DICTIONARY_FULL(io.trino.orc.OrcWriterStats.FlushReason.DICTIONARY_FULL) DIRECT(io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) OrcWriteValidationBuilder(io.trino.orc.OrcWriteValidation.OrcWriteValidationBuilder) Stream(io.trino.orc.metadata.Stream) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) FlushReason(io.trino.orc.OrcWriterStats.FlushReason) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) BloomFilterBuilder(io.trino.orc.metadata.statistics.BloomFilterBuilder) Slices(io.airlift.slice.Slices) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) MAGIC(io.trino.orc.metadata.PostScript.MAGIC) ROOT_COLUMN(io.trino.orc.metadata.OrcColumnId.ROOT_COLUMN) OrcMetadataWriter(io.trino.orc.metadata.OrcMetadataWriter) ImmutableSet(com.google.common.collect.ImmutableSet) OrcDataOutput.createDataOutput(io.trino.orc.stream.OrcDataOutput.createDataOutput) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ColumnWriters.createColumnWriter(io.trino.orc.writer.ColumnWriters.createColumnWriter) ColumnWriter(io.trino.orc.writer.ColumnWriter) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Metadata(io.trino.orc.metadata.Metadata) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Entry(java.util.Map.Entry) Optional(java.util.Optional) ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) Slice(io.airlift.slice.Slice) OrcWriteValidationMode(io.trino.orc.OrcWriteValidation.OrcWriteValidationMode) SliceDictionaryColumnWriter(io.trino.orc.writer.SliceDictionaryColumnWriter) OrcDataOutput(io.trino.orc.stream.OrcDataOutput) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) HashMap(java.util.HashMap) CLOSED(io.trino.orc.OrcWriterStats.FlushReason.CLOSED) OptionalInt(java.util.OptionalInt) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Utf8BloomFilterBuilder(io.trino.orc.metadata.statistics.Utf8BloomFilterBuilder) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) StripeInformation(io.trino.orc.metadata.StripeInformation) Math.toIntExact(java.lang.Math.toIntExact) Nullable(javax.annotation.Nullable) MAX_ROWS(io.trino.orc.OrcWriterStats.FlushReason.MAX_ROWS) StripeFooter(io.trino.orc.metadata.StripeFooter) Integer.min(java.lang.Integer.min) OrcType(io.trino.orc.metadata.OrcType) UnsignedBytes(com.google.common.primitives.UnsignedBytes) MAX_BYTES(io.trino.orc.OrcWriterStats.FlushReason.MAX_BYTES) IOException(java.io.IOException) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) CompressionKind(io.trino.orc.metadata.CompressionKind) Footer(io.trino.orc.metadata.Footer) Consumer(java.util.function.Consumer) Collectors.toList(java.util.stream.Collectors.toList) StripeStatistics(io.trino.orc.metadata.statistics.StripeStatistics) Closeable(java.io.Closeable) NoOpBloomFilterBuilder(io.trino.orc.metadata.statistics.NoOpBloomFilterBuilder) OrcReader.validateFile(io.trino.orc.OrcReader.validateFile) Collections(java.util.Collections) CompressedMetadataWriter(io.trino.orc.metadata.CompressedMetadataWriter) OrcColumnId(io.trino.orc.metadata.OrcColumnId) Optional(java.util.Optional) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) Metadata(io.trino.orc.metadata.Metadata) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) StripeFooter(io.trino.orc.metadata.StripeFooter) Footer(io.trino.orc.metadata.Footer) OrcDataOutput(io.trino.orc.stream.OrcDataOutput)

Aggregations

Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Verify.verify (com.google.common.base.Verify.verify)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 UnsignedBytes (com.google.common.primitives.UnsignedBytes)1 Slice (io.airlift.slice.Slice)1 Slices (io.airlift.slice.Slices)1 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)1 OrcReader.validateFile (io.trino.orc.OrcReader.validateFile)1 OrcWriteValidationBuilder (io.trino.orc.OrcWriteValidation.OrcWriteValidationBuilder)1 OrcWriteValidationMode (io.trino.orc.OrcWriteValidation.OrcWriteValidationMode)1 FlushReason (io.trino.orc.OrcWriterStats.FlushReason)1 CLOSED (io.trino.orc.OrcWriterStats.FlushReason.CLOSED)1 DICTIONARY_FULL (io.trino.orc.OrcWriterStats.FlushReason.DICTIONARY_FULL)1 MAX_BYTES (io.trino.orc.OrcWriterStats.FlushReason.MAX_BYTES)1 MAX_ROWS (io.trino.orc.OrcWriterStats.FlushReason.MAX_ROWS)1 ColumnEncoding (io.trino.orc.metadata.ColumnEncoding)1 DIRECT (io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT)1