Search in sources :

Example 1 with ValueStreams

use of io.prestosql.orc.stream.ValueStreams in project hetu-core by openlookeng.

the class StripeReader method createRowGroups.

private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
    ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
    for (int rowGroupId : selectedRowGroups) {
        Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
        int rowOffset = rowGroupId * rowsInRowGroup;
        int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
        long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
        rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
    }
    return rowGroupBuilder.build();
}
Also used : CheckpointInputStreamSource.createCheckpointStreamSource(io.prestosql.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) OrcDataReader(io.prestosql.orc.stream.OrcDataReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Set(java.util.Set) DICTIONARY_DATA(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) Checkpoints.getStreamCheckpoints(io.prestosql.orc.checkpoint.Checkpoints.getStreamCheckpoints) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) DICTIONARY(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BLOOM_FILTER_UTF8(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER_UTF8) Entry(java.util.Map.Entry) Optional(java.util.Optional) InvalidCheckpointException(io.prestosql.orc.checkpoint.InvalidCheckpointException) DICTIONARY_V2(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) Slice(io.airlift.slice.Slice) OrcChunkLoader(io.prestosql.orc.stream.OrcChunkLoader) Logger(io.airlift.log.Logger) ColumnEncodingKind(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_COUNT(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_COUNT) HashMap(java.util.HashMap) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) Math.toIntExact(java.lang.Math.toIntExact) LinkedHashSet(java.util.LinkedHashSet) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) ValueInputStream(io.prestosql.orc.stream.ValueInputStream) ROW_INDEX(io.prestosql.orc.metadata.Stream.StreamKind.ROW_INDEX) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Stream(io.prestosql.orc.metadata.Stream) BLOOM_FILTER(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER) ExecutionException(java.util.concurrent.ExecutionException) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) ValueStreams(io.prestosql.orc.stream.ValueStreams) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) InputStream(java.io.InputStream) LENGTH(io.prestosql.orc.metadata.Stream.StreamKind.LENGTH) ImmutableList(com.google.common.collect.ImmutableList) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Aggregations

Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Predicates (com.google.common.base.Predicates)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps (com.google.common.collect.Maps)1 UncheckedExecutionException (com.google.common.util.concurrent.UncheckedExecutionException)1 Logger (io.airlift.log.Logger)1 Slice (io.airlift.slice.Slice)1 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)1 OrcReader.handleCacheLoadException (io.prestosql.orc.OrcReader.handleCacheLoadException)1 Checkpoints.getDictionaryStreamCheckpoint (io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)1 Checkpoints.getStreamCheckpoints (io.prestosql.orc.checkpoint.Checkpoints.getStreamCheckpoints)1 InvalidCheckpointException (io.prestosql.orc.checkpoint.InvalidCheckpointException)1 StreamCheckpoint (io.prestosql.orc.checkpoint.StreamCheckpoint)1 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)1 ColumnEncodingKind (io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind)1 DICTIONARY (io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY)1