Search in sources :

Example 1 with STRUCT

use of com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.STRUCT in project presto by prestodb.

the class StripeReader method getRowGroupStatistics.

private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
    requireNonNull(rootStructType, "rootStructType is null");
    checkArgument(rootStructType.getOrcTypeKind() == STRUCT);
    requireNonNull(columnIndexes, "columnIndexes is null");
    checkArgument(rowGroup >= 0, "rowGroup is negative");
    Map<Integer, List<ColumnStatistics>> groupedColumnStatistics = new HashMap<>();
    for (Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) {
        if (!entry.getValue().isEmpty() && entry.getValue().get(rowGroup) != null) {
            groupedColumnStatistics.computeIfAbsent(entry.getKey().getColumn(), key -> new ArrayList<>()).add(entry.getValue().get(rowGroup).getColumnStatistics());
        }
    }
    ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) {
        List<ColumnStatistics> columnStatistics = groupedColumnStatistics.get(rootStructType.getFieldTypeIndex(ordinal));
        if (columnStatistics != null) {
            if (columnStatistics.size() == 1) {
                statistics.put(ordinal, getOnlyElement(columnStatistics));
            } else {
                // Merge statistics from different streams
                // This can happen if map is represented as struct (DWRF only)
                statistics.put(ordinal, mergeColumnStatistics(columnStatistics));
            }
        }
    }
    return statistics.build();
}
Also used : StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) OrcTypeKind(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INDEX(com.facebook.presto.orc.metadata.Stream.StreamArea.INDEX) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) StreamKind(com.facebook.presto.orc.metadata.Stream.StreamKind) InvalidCheckpointException(com.facebook.presto.orc.checkpoint.InvalidCheckpointException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) InputStreamSource(com.facebook.presto.orc.stream.InputStreamSource) Collection(java.util.Collection) Set(java.util.Set) CheckpointInputStreamSource.createCheckpointStreamSource(com.facebook.presto.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) ValueInputStreamSource(com.facebook.presto.orc.stream.ValueInputStreamSource) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) List(java.util.List) Entry(java.util.Map.Entry) InputStreamSources(com.facebook.presto.orc.stream.InputStreamSources) Optional(java.util.Optional) ValueInputStream(com.facebook.presto.orc.stream.ValueInputStream) DICTIONARY(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) DwrfSequenceEncoding(com.facebook.presto.orc.metadata.DwrfSequenceEncoding) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) SortedMap(java.util.SortedMap) DICTIONARY_DATA(com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) ROW_INDEX(com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_INDEX) Slice(io.airlift.slice.Slice) LENGTH(com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH) HashMap(java.util.HashMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) DwrfMetadataReader.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataReader.toStripeEncryptionGroup) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) HiveWriterVersion(com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) OrcType(com.facebook.presto.orc.metadata.OrcType) Math.toIntExact(java.lang.Math.toIntExact) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ColumnStatistics.mergeColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics) BLOOM_FILTER(com.facebook.presto.orc.metadata.Stream.StreamKind.BLOOM_FILTER) NOOP_ORC_LOCAL_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcLocalMemoryContext.NOOP_ORC_LOCAL_MEMORY_CONTEXT) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) ColumnEncodingKind(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_V2(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) STRUCT(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.STRUCT) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Maps(com.google.common.collect.Maps) Stream(com.facebook.presto.orc.metadata.Stream) Math.multiplyExact(java.lang.Math.multiplyExact) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) Checkpoints.getStreamCheckpoints(com.facebook.presto.orc.checkpoint.Checkpoints.getStreamCheckpoints) ValueStreams(com.facebook.presto.orc.stream.ValueStreams) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MetadataReader(com.facebook.presto.orc.metadata.MetadataReader) InputStream(java.io.InputStream) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ColumnStatistics.mergeColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ImmutableMap(com.google.common.collect.ImmutableMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

RuntimeStats (com.facebook.presto.common.RuntimeStats)1 NOOP_ORC_LOCAL_MEMORY_CONTEXT (com.facebook.presto.orc.NoopOrcLocalMemoryContext.NOOP_ORC_LOCAL_MEMORY_CONTEXT)1 Checkpoints.getDictionaryStreamCheckpoint (com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)1 Checkpoints.getStreamCheckpoints (com.facebook.presto.orc.checkpoint.Checkpoints.getStreamCheckpoints)1 InvalidCheckpointException (com.facebook.presto.orc.checkpoint.InvalidCheckpointException)1 StreamCheckpoint (com.facebook.presto.orc.checkpoint.StreamCheckpoint)1 ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)1 ColumnEncodingKind (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind)1 DICTIONARY (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY)1 DICTIONARY_V2 (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2)1 DwrfMetadataReader.toStripeEncryptionGroup (com.facebook.presto.orc.metadata.DwrfMetadataReader.toStripeEncryptionGroup)1 DwrfSequenceEncoding (com.facebook.presto.orc.metadata.DwrfSequenceEncoding)1 MetadataReader (com.facebook.presto.orc.metadata.MetadataReader)1 OrcType (com.facebook.presto.orc.metadata.OrcType)1 OrcTypeKind (com.facebook.presto.orc.metadata.OrcType.OrcTypeKind)1 STRUCT (com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.STRUCT)1 HiveWriterVersion (com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion)1 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)1 Stream (com.facebook.presto.orc.metadata.Stream)1 INDEX (com.facebook.presto.orc.metadata.Stream.StreamArea.INDEX)1