Search in sources :

Example 11 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class StripeReader method readBloomFilterIndexes.

private Map<OrcColumnId, List<BloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData) throws IOException {
    HashMap<OrcColumnId, List<BloomFilter>> bloomFilters = new HashMap<>();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == BLOOM_FILTER_UTF8) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            bloomFilters.put(stream.getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
        }
    }
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == BLOOM_FILTER && !bloomFilters.containsKey(stream.getColumnId())) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            bloomFilters.put(entry.getKey().getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
        }
    }
    return ImmutableMap.copyOf(bloomFilters);
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) OrcInputStream(io.trino.orc.stream.OrcInputStream) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream)

Example 12 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class StripeReader method createValueStreams.

private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData, ColumnMetadata<ColumnEncoding> columnEncodings) {
    ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        StreamId streamId = entry.getKey();
        Stream stream = entry.getValue();
        ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumnId()).getColumnEncodingKind();
        // skip index and empty streams
        if (isIndexStream(stream) || stream.getLength() == 0) {
            continue;
        }
        OrcChunkLoader chunkLoader = streamsData.get(streamId);
        OrcTypeKind columnType = types.get(stream.getColumnId()).getOrcTypeKind();
        valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, chunkLoader, columnType, columnEncoding));
    }
    return valueStreams.buildOrThrow();
}
Also used : ValueInputStream(io.trino.orc.stream.ValueInputStream) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream) OrcChunkLoader(io.trino.orc.stream.OrcChunkLoader) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnEncodingKind(io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind)

Example 13 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class Checkpoints method getStreamCheckpoints.

public static Map<StreamId, StreamCheckpoint> getStreamCheckpoints(Set<OrcColumnId> columns, ColumnMetadata<OrcType> columnTypes, boolean compressed, int rowGroupId, ColumnMetadata<ColumnEncoding> columnEncodings, Map<StreamId, Stream> streams, Map<StreamId, List<RowGroupIndex>> columnIndexes) throws InvalidCheckpointException {
    ImmutableSetMultimap.Builder<OrcColumnId, StreamKind> streamKindsBuilder = ImmutableSetMultimap.builder();
    for (Stream stream : streams.values()) {
        streamKindsBuilder.put(stream.getColumnId(), stream.getStreamKind());
    }
    SetMultimap<OrcColumnId, StreamKind> streamKinds = streamKindsBuilder.build();
    ImmutableMap.Builder<StreamId, StreamCheckpoint> checkpoints = ImmutableMap.builder();
    for (Map.Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) {
        OrcColumnId columnId = entry.getKey().getColumnId();
        if (!columns.contains(columnId)) {
            continue;
        }
        List<Integer> positionsList = entry.getValue().get(rowGroupId).getPositions();
        ColumnEncodingKind columnEncoding = columnEncodings.get(columnId).getColumnEncodingKind();
        OrcTypeKind columnType = columnTypes.get(columnId).getOrcTypeKind();
        Set<StreamKind> availableStreams = streamKinds.get(columnId);
        ColumnPositionsList columnPositionsList = new ColumnPositionsList(columnId, columnType, positionsList);
        switch(columnType) {
            case BOOLEAN:
                checkpoints.putAll(getBooleanColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
                break;
            case BYTE:
                checkpoints.putAll(getByteColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
                break;
            case SHORT:
            case INT:
            case LONG:
            case DATE:
                checkpoints.putAll(getLongColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
                break;
            case FLOAT:
                checkpoints.putAll(getFloatColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
                break;
            case DOUBLE:
                checkpoints.putAll(getDoubleColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
                break;
            case TIMESTAMP:
            case TIMESTAMP_INSTANT:
                checkpoints.putAll(getTimestampColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
                break;
            case BINARY:
            case STRING:
            case VARCHAR:
            case CHAR:
                checkpoints.putAll(getSliceColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
                break;
            case LIST:
            case MAP:
                checkpoints.putAll(getListOrMapColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
                break;
            case STRUCT:
                checkpoints.putAll(getStructColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
                break;
            case DECIMAL:
                checkpoints.putAll(getDecimalColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
                break;
            default:
                throw new IllegalArgumentException("Unsupported column type " + columnType);
        }
    }
    return checkpoints.buildOrThrow();
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) StreamId(io.trino.orc.StreamId) StreamKind(io.trino.orc.metadata.Stream.StreamKind) ImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) Stream(io.trino.orc.metadata.Stream) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) InputStreamCheckpoint.createInputStreamCheckpoint(io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint) ColumnEncodingKind(io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind)

Example 14 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class DecimalColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<DecimalStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    List<LongStreamCheckpoint> scaleCheckpoints = scaleStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        DecimalStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        LongStreamCheckpoint scaleCheckpoint = scaleCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createDecimalColumnPositionList(compressed, dataCheckpoint, scaleCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) DecimalStreamCheckpoint(io.trino.orc.checkpoint.DecimalStreamCheckpoint) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) DecimalStreamCheckpoint(io.trino.orc.checkpoint.DecimalStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(io.trino.orc.stream.PresentOutputStream) DecimalOutputStream(io.trino.orc.stream.DecimalOutputStream) Stream(io.trino.orc.metadata.Stream) LongOutputStream(io.trino.orc.stream.LongOutputStream)

Example 15 with Stream

use of io.trino.orc.metadata.Stream in project trino by trinodb.

the class DoubleColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<DoubleStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        DoubleStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createDoubleColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) DoubleStreamCheckpoint(io.trino.orc.checkpoint.DoubleStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DoubleStreamCheckpoint(io.trino.orc.checkpoint.DoubleStreamCheckpoint) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) PresentOutputStream(io.trino.orc.stream.PresentOutputStream) DoubleOutputStream(io.trino.orc.stream.DoubleOutputStream) Stream(io.trino.orc.metadata.Stream)

Aggregations

Stream (io.trino.orc.metadata.Stream)33 Slice (io.airlift.slice.Slice)23 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)23 StreamDataOutput (io.trino.orc.stream.StreamDataOutput)20 ArrayList (java.util.ArrayList)20 List (java.util.List)20 ImmutableList (com.google.common.collect.ImmutableList)19 PresentOutputStream (io.trino.orc.stream.PresentOutputStream)18 RowGroupIndex (io.trino.orc.metadata.RowGroupIndex)16 BooleanStreamCheckpoint (io.trino.orc.checkpoint.BooleanStreamCheckpoint)14 OrcColumnId (io.trino.orc.metadata.OrcColumnId)11 BloomFilter (io.trino.orc.metadata.statistics.BloomFilter)9 OrcInputStream (io.trino.orc.stream.OrcInputStream)9 InputStream (java.io.InputStream)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 LongOutputStream (io.trino.orc.stream.LongOutputStream)8 ValueInputStream (io.trino.orc.stream.ValueInputStream)8 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)7 StreamCheckpoint (io.trino.orc.checkpoint.StreamCheckpoint)6