Search in sources :

Example 76 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class OrcWriteValidation method validateColumnStatisticsEquivalent.

private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, List<ColumnStatistics> actualColumnStatistics, List<ColumnStatistics> expectedColumnStatistics) throws OrcCorruptionException {
    requireNonNull(name, "name is null");
    requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
    requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
    if (actualColumnStatistics.size() != expectedColumnStatistics.size()) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in %s statistics", name);
    }
    for (int i = 0; i < actualColumnStatistics.size(); i++) {
        ColumnStatistics actual = actualColumnStatistics.get(i);
        ColumnStatistics expected = expectedColumnStatistics.get(i);
        validateColumnStatisticsEquivalent(orcDataSourceId, name + " column " + i, actual, expected);
    }
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics)

Example 77 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class OrcWriter method addStatsRecursive.

private void addStatsRecursive(List<ColumnStatistics> allStats, int index, Map<Integer, List<ColumnStatistics>> nodeAndSubNodeStats, List<ColumnStatistics> unencryptedStats, Map<Integer, Map<Integer, Slice>> encryptedStats) throws IOException {
    if (allStats.isEmpty()) {
        return;
    }
    ColumnStatistics columnStatistics = allStats.get(index);
    if (dwrfEncryptionInfo.getGroupByNodeId(index).isPresent()) {
        int group = dwrfEncryptionInfo.getGroupByNodeId(index).get();
        boolean isRootNode = dwrfWriterEncryption.get().getWriterEncryptionGroups().get(group).getNodes().contains(index);
        verify(isRootNode && nodeAndSubNodeStats.isEmpty() || nodeAndSubNodeStats.size() == 1 && nodeAndSubNodeStats.get(group) != null, "nodeAndSubNodeStats should only be present for subnodes of a group");
        nodeAndSubNodeStats.computeIfAbsent(group, x -> new ArrayList<>()).add(columnStatistics);
        unencryptedStats.add(new ColumnStatistics(columnStatistics.getNumberOfValues(), null));
        for (Integer fieldIndex : orcTypes.get(index).getFieldTypeIndexes()) {
            addStatsRecursive(allStats, fieldIndex, nodeAndSubNodeStats, unencryptedStats, encryptedStats);
        }
        if (isRootNode) {
            Slice encryptedFileStatistics = toEncryptedFileStatistics(nodeAndSubNodeStats.get(group), group);
            encryptedStats.computeIfAbsent(group, x -> new HashMap<>()).put(index, encryptedFileStatistics);
        }
    } else {
        unencryptedStats.add(columnStatistics);
        for (Integer fieldIndex : orcTypes.get(index).getFieldTypeIndexes()) {
            addStatsRecursive(allStats, fieldIndex, new HashMap<>(), unencryptedStats, encryptedStats);
        }
    }
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DEFAULT_SEQUENCE_ID(com.facebook.presto.orc.metadata.ColumnEncoding.DEFAULT_SEQUENCE_ID) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) OrcType.mapColumnToNode(com.facebook.presto.orc.metadata.OrcType.mapColumnToNode) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) Collections(java.util.Collections) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) HashMap(java.util.HashMap) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList)

Example 78 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class OrcWriter method bufferStripeData.

/**
 * Collect the data for the stripe.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<DataOutput> bufferStripeData(long stripeStartOffset, FlushReason flushReason) throws IOException {
    if (stripeRowCount == 0) {
        return ImmutableList.of();
    }
    List<DataOutput> outputData = new ArrayList<>();
    List<Stream> unencryptedStreams = new ArrayList<>(columnWriters.size() * 3);
    Multimap<Integer, Stream> encryptedStreams = ArrayListMultimap.create();
    // get index streams
    long indexLength = 0;
    long offset = 0;
    int previousEncryptionGroup = -1;
    for (ColumnWriter columnWriter : columnWriters) {
        for (StreamDataOutput indexStream : columnWriter.getIndexStreams(Optional.empty())) {
            // The ordering is critical because the stream only contain a length with no offset.
            // if the previous stream was part of a different encryption group, need to specify an offset so we know the column order
            outputData.add(indexStream);
            Optional<Integer> encryptionGroup = dwrfEncryptionInfo.getGroupByNodeId(indexStream.getStream().getColumn());
            if (encryptionGroup.isPresent()) {
                Stream stream = previousEncryptionGroup == encryptionGroup.get() ? indexStream.getStream() : indexStream.getStream().withOffset(offset);
                encryptedStreams.put(encryptionGroup.get(), stream);
                previousEncryptionGroup = encryptionGroup.get();
            } else {
                Stream stream = previousEncryptionGroup == -1 ? indexStream.getStream() : indexStream.getStream().withOffset(offset);
                unencryptedStreams.add(stream);
                previousEncryptionGroup = -1;
            }
            offset += indexStream.size();
            indexLength += indexStream.size();
        }
    }
    if (dwrfStripeCacheWriter.isPresent()) {
        dwrfStripeCacheWriter.get().addIndexStreams(ImmutableList.copyOf(outputData), indexLength);
    }
    // data streams (sorted by size)
    long dataLength = 0;
    List<StreamDataOutput> dataStreams = new ArrayList<>(columnWriters.size() * 2);
    for (ColumnWriter columnWriter : columnWriters) {
        List<StreamDataOutput> streams = columnWriter.getDataStreams();
        dataStreams.addAll(streams);
        dataLength += streams.stream().mapToLong(StreamDataOutput::size).sum();
    }
    streamLayout.reorder(dataStreams);
    // add data streams
    for (StreamDataOutput dataStream : dataStreams) {
        // The ordering is critical because the stream only contains a length with no offset.
        // if the previous stream was part of a different encryption group, need to specify an offset so we know the column order
        outputData.add(dataStream);
        Optional<Integer> encryptionGroup = dwrfEncryptionInfo.getGroupByNodeId(dataStream.getStream().getColumn());
        if (encryptionGroup.isPresent()) {
            Stream stream = previousEncryptionGroup == encryptionGroup.get() ? dataStream.getStream() : dataStream.getStream().withOffset(offset);
            encryptedStreams.put(encryptionGroup.get(), stream);
            previousEncryptionGroup = encryptionGroup.get();
        } else {
            Stream stream = previousEncryptionGroup == -1 ? dataStream.getStream() : dataStream.getStream().withOffset(offset);
            unencryptedStreams.add(stream);
            previousEncryptionGroup = -1;
        }
        offset += dataStream.size();
    }
    Map<Integer, ColumnEncoding> columnEncodings = new HashMap<>();
    columnWriters.forEach(columnWriter -> columnEncodings.putAll(columnWriter.getColumnEncodings()));
    Map<Integer, ColumnStatistics> columnStatistics = new HashMap<>();
    columnWriters.forEach(columnWriter -> columnStatistics.putAll(columnWriter.getColumnStripeStatistics()));
    // the 0th column is a struct column for the whole row
    columnEncodings.put(0, new ColumnEncoding(DIRECT, 0));
    columnStatistics.put(0, new ColumnStatistics((long) stripeRowCount, null));
    Map<Integer, ColumnEncoding> unencryptedColumnEncodings = columnEncodings.entrySet().stream().filter(entry -> !dwrfEncryptionInfo.getGroupByNodeId(entry.getKey()).isPresent()).collect(toImmutableMap(Entry::getKey, Entry::getValue));
    Map<Integer, ColumnEncoding> encryptedColumnEncodings = columnEncodings.entrySet().stream().filter(entry -> dwrfEncryptionInfo.getGroupByNodeId(entry.getKey()).isPresent()).collect(toImmutableMap(Entry::getKey, Entry::getValue));
    List<Slice> encryptedGroups = createEncryptedGroups(encryptedStreams, encryptedColumnEncodings);
    StripeFooter stripeFooter = new StripeFooter(unencryptedStreams, unencryptedColumnEncodings, encryptedGroups);
    Slice footer = metadataWriter.writeStripeFooter(stripeFooter);
    outputData.add(createDataOutput(footer));
    dwrfStripeCacheWriter.ifPresent(stripeCacheWriter -> stripeCacheWriter.addStripeFooter(createDataOutput(footer)));
    // create final stripe statistics
    StripeStatistics statistics = new StripeStatistics(toDenseList(columnStatistics, orcTypes.size()));
    recordValidation(validation -> validation.addStripeStatistics(stripeStartOffset, statistics));
    StripeInformation stripeInformation = new StripeInformation(stripeRowCount, stripeStartOffset, indexLength, dataLength, footer.length(), OptionalLong.of(stripeRawSize), dwrfEncryptionInfo.getEncryptedKeyMetadatas());
    ClosedStripe closedStripe = new ClosedStripe(stripeInformation, statistics);
    closedStripes.add(closedStripe);
    closedStripesRetainedBytes += closedStripe.getRetainedSizeInBytes();
    recordValidation(validation -> validation.addStripe(stripeInformation.getNumberOfRows()));
    stats.recordStripeWritten(flushPolicy.getStripeMinBytes(), flushPolicy.getStripeMaxBytes(), dictionaryMaxMemoryBytes, flushReason, dictionaryCompressionOptimizer.getDictionaryMemoryBytes(), stripeInformation);
    return outputData;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DEFAULT_SEQUENCE_ID(com.facebook.presto.orc.metadata.ColumnEncoding.DEFAULT_SEQUENCE_ID) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) OrcType.mapColumnToNode(com.facebook.presto.orc.metadata.OrcType.mapColumnToNode) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) Collections(java.util.Collections) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) DataOutput(com.facebook.presto.common.io.DataOutput) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) IntStream(java.util.stream.IntStream) Stream(com.facebook.presto.orc.metadata.Stream) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 79 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class StripeReader method getRowGroupStatistics.

private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
    requireNonNull(rootStructType, "rootStructType is null");
    checkArgument(rootStructType.getOrcTypeKind() == STRUCT);
    requireNonNull(columnIndexes, "columnIndexes is null");
    checkArgument(rowGroup >= 0, "rowGroup is negative");
    Map<Integer, List<ColumnStatistics>> groupedColumnStatistics = new HashMap<>();
    for (Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) {
        if (!entry.getValue().isEmpty() && entry.getValue().get(rowGroup) != null) {
            groupedColumnStatistics.computeIfAbsent(entry.getKey().getColumn(), key -> new ArrayList<>()).add(entry.getValue().get(rowGroup).getColumnStatistics());
        }
    }
    ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) {
        List<ColumnStatistics> columnStatistics = groupedColumnStatistics.get(rootStructType.getFieldTypeIndex(ordinal));
        if (columnStatistics != null) {
            if (columnStatistics.size() == 1) {
                statistics.put(ordinal, getOnlyElement(columnStatistics));
            } else {
                // Merge statistics from different streams
                // This can happen if map is represented as struct (DWRF only)
                statistics.put(ordinal, mergeColumnStatistics(columnStatistics));
            }
        }
    }
    return statistics.build();
}
Also used : StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) OrcTypeKind(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INDEX(com.facebook.presto.orc.metadata.Stream.StreamArea.INDEX) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) StreamKind(com.facebook.presto.orc.metadata.Stream.StreamKind) InvalidCheckpointException(com.facebook.presto.orc.checkpoint.InvalidCheckpointException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) InputStreamSource(com.facebook.presto.orc.stream.InputStreamSource) Collection(java.util.Collection) Set(java.util.Set) CheckpointInputStreamSource.createCheckpointStreamSource(com.facebook.presto.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) ValueInputStreamSource(com.facebook.presto.orc.stream.ValueInputStreamSource) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) List(java.util.List) Entry(java.util.Map.Entry) InputStreamSources(com.facebook.presto.orc.stream.InputStreamSources) Optional(java.util.Optional) ValueInputStream(com.facebook.presto.orc.stream.ValueInputStream) DICTIONARY(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) DwrfSequenceEncoding(com.facebook.presto.orc.metadata.DwrfSequenceEncoding) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) SortedMap(java.util.SortedMap) DICTIONARY_DATA(com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) ROW_INDEX(com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_INDEX) Slice(io.airlift.slice.Slice) LENGTH(com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH) HashMap(java.util.HashMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) DwrfMetadataReader.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataReader.toStripeEncryptionGroup) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) HiveWriterVersion(com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) OrcType(com.facebook.presto.orc.metadata.OrcType) Math.toIntExact(java.lang.Math.toIntExact) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ColumnStatistics.mergeColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics) BLOOM_FILTER(com.facebook.presto.orc.metadata.Stream.StreamKind.BLOOM_FILTER) NOOP_ORC_LOCAL_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcLocalMemoryContext.NOOP_ORC_LOCAL_MEMORY_CONTEXT) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) ColumnEncodingKind(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_V2(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) STRUCT(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.STRUCT) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Maps(com.google.common.collect.Maps) Stream(com.facebook.presto.orc.metadata.Stream) Math.multiplyExact(java.lang.Math.multiplyExact) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) Checkpoints.getStreamCheckpoints(com.facebook.presto.orc.checkpoint.Checkpoints.getStreamCheckpoints) ValueStreams(com.facebook.presto.orc.stream.ValueStreams) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MetadataReader(com.facebook.presto.orc.metadata.MetadataReader) InputStream(java.io.InputStream) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ColumnStatistics.mergeColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ImmutableMap(com.google.common.collect.ImmutableMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Example 80 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class AbstractOrcRecordReader method close.

@Override
public void close() throws IOException {
    try (Closer closer = Closer.create()) {
        closer.register(orcDataSource);
        for (StreamReader column : streamReaders) {
            if (column != null) {
                closer.register(column::close);
            }
        }
    }
    rowGroups = null;
    if (writeChecksumBuilder.isPresent()) {
        OrcWriteValidation.WriteChecksum actualChecksum = writeChecksumBuilder.get().build();
        validateWrite(validation -> validation.getChecksum().getTotalRowCount() == actualChecksum.getTotalRowCount(), "Invalid row count");
        List<Long> columnHashes = actualChecksum.getColumnHashes();
        for (int i = 0; i < columnHashes.size(); i++) {
            int columnIndex = i;
            validateWrite(validation -> validation.getChecksum().getColumnHashes().get(columnIndex).equals(columnHashes.get(columnIndex)), "Invalid checksum for column %s", columnIndex);
        }
        validateWrite(validation -> validation.getChecksum().getStripeHash() == actualChecksum.getStripeHash(), "Invalid stripes checksum");
    }
    if (fileStatisticsValidation.isPresent()) {
        List<ColumnStatistics> columnStatistics = fileStatisticsValidation.get().build();
        writeValidation.get().validateFileStatistics(orcDataSource.getId(), columnStatistics);
    }
}
Also used : Closer(com.google.common.io.Closer) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) StreamReader(com.facebook.presto.orc.reader.StreamReader) Comparator.comparingLong(java.util.Comparator.comparingLong)

Aggregations

ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)99 ImmutableList (com.google.common.collect.ImmutableList)46 Slice (io.airlift.slice.Slice)46 List (java.util.List)46 Stream (com.facebook.presto.orc.metadata.Stream)38 ArrayList (java.util.ArrayList)38 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)32 StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)32 BooleanStreamCheckpoint (com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)26 PresentOutputStream (com.facebook.presto.orc.stream.PresentOutputStream)26 ImmutableMap (com.google.common.collect.ImmutableMap)23 LongOutputStream (com.facebook.presto.orc.stream.LongOutputStream)16 OrcType (com.facebook.presto.orc.metadata.OrcType)15 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)14 Map (java.util.Map)14 Type (com.facebook.presto.common.type.Type)13 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)12 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 Optional (java.util.Optional)12