Search in sources :

Example 1 with StreamCheckpoint

use of io.prestosql.orc.checkpoint.StreamCheckpoint in project hetu-core by openlookeng.

the class StripeReader method createDictionaryStreamSources.

private InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, ColumnMetadata<ColumnEncoding> columnEncodings) {
    ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        StreamId streamId = entry.getKey();
        Stream stream = entry.getValue();
        OrcColumnId column = stream.getColumnId();
        // only process dictionary streams
        ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind();
        if (!isDictionary(stream, columnEncoding)) {
            continue;
        }
        // skip streams without data
        ValueInputStream<?> valueStream = valueStreams.get(streamId);
        if (valueStream == null) {
            continue;
        }
        OrcTypeKind columnType = types.get(stream.getColumnId()).getOrcTypeKind();
        StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding);
        InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint);
        dictionaryStreamBuilder.put(streamId, streamSource);
    }
    return new InputStreamSources(dictionaryStreamBuilder.build());
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) ValueInputStream(io.prestosql.orc.stream.ValueInputStream) Stream(io.prestosql.orc.metadata.Stream) InputStream(java.io.InputStream) ColumnEncodingKind(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Example 2 with StreamCheckpoint

use of io.prestosql.orc.checkpoint.StreamCheckpoint in project hetu-core by openlookeng.

the class StripeReader method createRowGroup.

private static RowGroup createRowGroup(int groupId, int rowOffset, int rowCount, long minAverageRowBytes, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, StreamCheckpoint> checkpoints) {
    ImmutableMap.Builder<StreamId, InputStreamSource<?>> builder = ImmutableMap.builder();
    for (Entry<StreamId, StreamCheckpoint> entry : checkpoints.entrySet()) {
        StreamId streamId = entry.getKey();
        StreamCheckpoint checkpoint = entry.getValue();
        // skip streams without data
        ValueInputStream<?> valueStream = valueStreams.get(streamId);
        if (valueStream == null) {
            continue;
        }
        builder.put(streamId, createCheckpointStreamSource(valueStream, checkpoint));
    }
    InputStreamSources rowGroupStreams = new InputStreamSources(builder.build());
    return new RowGroup(groupId, rowOffset, rowCount, minAverageRowBytes, rowGroupStreams);
}
Also used : ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Example 3 with StreamCheckpoint

use of io.prestosql.orc.checkpoint.StreamCheckpoint in project hetu-core by openlookeng.

the class StripeReader method createRowGroups.

private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
    ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
    for (int rowGroupId : selectedRowGroups) {
        Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
        int rowOffset = rowGroupId * rowsInRowGroup;
        int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
        long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
        rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
    }
    return rowGroupBuilder.build();
}
Also used : CheckpointInputStreamSource.createCheckpointStreamSource(io.prestosql.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) OrcDataReader(io.prestosql.orc.stream.OrcDataReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Set(java.util.Set) DICTIONARY_DATA(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) Checkpoints.getStreamCheckpoints(io.prestosql.orc.checkpoint.Checkpoints.getStreamCheckpoints) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) DICTIONARY(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BLOOM_FILTER_UTF8(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER_UTF8) Entry(java.util.Map.Entry) Optional(java.util.Optional) InvalidCheckpointException(io.prestosql.orc.checkpoint.InvalidCheckpointException) DICTIONARY_V2(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) Slice(io.airlift.slice.Slice) OrcChunkLoader(io.prestosql.orc.stream.OrcChunkLoader) Logger(io.airlift.log.Logger) ColumnEncodingKind(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_COUNT(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_COUNT) HashMap(java.util.HashMap) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) Math.toIntExact(java.lang.Math.toIntExact) LinkedHashSet(java.util.LinkedHashSet) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) ValueInputStream(io.prestosql.orc.stream.ValueInputStream) ROW_INDEX(io.prestosql.orc.metadata.Stream.StreamKind.ROW_INDEX) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Stream(io.prestosql.orc.metadata.Stream) BLOOM_FILTER(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER) ExecutionException(java.util.concurrent.ExecutionException) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) ValueStreams(io.prestosql.orc.stream.ValueStreams) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) InputStream(java.io.InputStream) LENGTH(io.prestosql.orc.metadata.Stream.StreamKind.LENGTH) ImmutableList(com.google.common.collect.ImmutableList) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Aggregations

ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)3 Checkpoints.getDictionaryStreamCheckpoint (io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)3 StreamCheckpoint (io.prestosql.orc.checkpoint.StreamCheckpoint)3 InputStreamSource (io.prestosql.orc.stream.InputStreamSource)3 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)3 ValueInputStreamSource (io.prestosql.orc.stream.ValueInputStreamSource)3 ColumnEncodingKind (io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind)2 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)2 OrcTypeKind (io.prestosql.orc.metadata.OrcType.OrcTypeKind)2 Stream (io.prestosql.orc.metadata.Stream)2 OrcInputStream (io.prestosql.orc.stream.OrcInputStream)2 ValueInputStream (io.prestosql.orc.stream.ValueInputStream)2 InputStream (java.io.InputStream)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Predicates (com.google.common.base.Predicates)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps (com.google.common.collect.Maps)1