Search in sources :

Example 31 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class StripeReader method getDiskRanges.

private static Map<StreamId, DiskRange> getDiskRanges(List<Stream> streams) {
    ImmutableMap.Builder<StreamId, DiskRange> streamDiskRanges = ImmutableMap.builder();
    long stripeOffset = 0;
    for (Stream stream : streams) {
        int streamLength = stream.getLength();
        // ignore zero byte streams
        if (streamLength > 0) {
            streamDiskRanges.put(new StreamId(stream), new DiskRange(stripeOffset, streamLength));
        }
        stripeOffset += streamLength;
    }
    return streamDiskRanges.buildOrThrow();
}
Also used : Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) StreamCheckpoint(io.trino.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)

Example 32 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class StripeReader method createDictionaryStreamSources.

private InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, ColumnMetadata<ColumnEncoding> columnEncodings) {
    ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        StreamId streamId = entry.getKey();
        Stream stream = entry.getValue();
        OrcColumnId column = stream.getColumnId();
        // only process dictionary streams
        ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind();
        if (!isDictionary(stream, columnEncoding)) {
            continue;
        }
        // skip streams without data
        ValueInputStream<?> valueStream = valueStreams.get(streamId);
        if (valueStream == null) {
            continue;
        }
        OrcTypeKind columnType = types.get(stream.getColumnId()).getOrcTypeKind();
        StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding);
        InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint);
        dictionaryStreamBuilder.put(streamId, streamSource);
    }
    return new InputStreamSources(dictionaryStreamBuilder.buildOrThrow());
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ValueInputStreamSource(io.trino.orc.stream.ValueInputStreamSource) InputStreamSource(io.trino.orc.stream.InputStreamSource) InputStreamSources(io.trino.orc.stream.InputStreamSources) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream) ColumnEncodingKind(io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind) StreamCheckpoint(io.trino.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)

Example 33 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class StripeReader method readColumnIndexes.

private Map<StreamId, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData, Map<OrcColumnId, List<BloomFilter>> bloomFilterIndexes) throws IOException {
    ImmutableMap.Builder<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == ROW_INDEX) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            List<BloomFilter> bloomFilters = bloomFilterIndexes.get(entry.getKey().getColumnId());
            List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
            if (bloomFilters != null && !bloomFilters.isEmpty()) {
                ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder();
                for (int i = 0; i < rowGroupIndexes.size(); i++) {
                    RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i);
                    ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics().withBloomFilter(bloomFilters.get(i));
                    newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics));
                }
                rowGroupIndexes = newRowGroupIndexes.build();
            }
            columnIndexes.put(entry.getKey(), rowGroupIndexes);
        }
    }
    return columnIndexes.buildOrThrow();
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcInputStream(io.trino.orc.stream.OrcInputStream) ImmutableList(com.google.common.collect.ImmutableList) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BloomFilter(io.trino.orc.metadata.statistics.BloomFilter) StreamCheckpoint(io.trino.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream)

Aggregations

Stream (io.trino.orc.metadata.Stream)33 Slice (io.airlift.slice.Slice)23 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)23 StreamDataOutput (io.trino.orc.stream.StreamDataOutput)20 ArrayList (java.util.ArrayList)20 List (java.util.List)20 ImmutableList (com.google.common.collect.ImmutableList)19 PresentOutputStream (io.trino.orc.stream.PresentOutputStream)18 RowGroupIndex (io.trino.orc.metadata.RowGroupIndex)16 BooleanStreamCheckpoint (io.trino.orc.checkpoint.BooleanStreamCheckpoint)14 OrcColumnId (io.trino.orc.metadata.OrcColumnId)11 BloomFilter (io.trino.orc.metadata.statistics.BloomFilter)9 OrcInputStream (io.trino.orc.stream.OrcInputStream)9 InputStream (java.io.InputStream)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 LongOutputStream (io.trino.orc.stream.LongOutputStream)8 ValueInputStream (io.trino.orc.stream.ValueInputStream)8 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)7 StreamCheckpoint (io.trino.orc.checkpoint.StreamCheckpoint)6