use of io.trino.orc.metadata.Stream in project trino by trinodb.
the class StripeReader method readBloomFilterIndexes.
private Map<OrcColumnId, List<BloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData) throws IOException {
HashMap<OrcColumnId, List<BloomFilter>> bloomFilters = new HashMap<>();
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
Stream stream = entry.getValue();
if (stream.getStreamKind() == BLOOM_FILTER_UTF8) {
OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
bloomFilters.put(stream.getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
}
}
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
Stream stream = entry.getValue();
if (stream.getStreamKind() == BLOOM_FILTER && !bloomFilters.containsKey(stream.getColumnId())) {
OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
bloomFilters.put(entry.getKey().getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
}
}
return ImmutableMap.copyOf(bloomFilters);
}
use of io.trino.orc.metadata.Stream in project trino by trinodb.
the class StripeReader method createValueStreams.
private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData, ColumnMetadata<ColumnEncoding> columnEncodings) {
ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder();
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
StreamId streamId = entry.getKey();
Stream stream = entry.getValue();
ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumnId()).getColumnEncodingKind();
// skip index and empty streams
if (isIndexStream(stream) || stream.getLength() == 0) {
continue;
}
OrcChunkLoader chunkLoader = streamsData.get(streamId);
OrcTypeKind columnType = types.get(stream.getColumnId()).getOrcTypeKind();
valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, chunkLoader, columnType, columnEncoding));
}
return valueStreams.buildOrThrow();
}
use of io.trino.orc.metadata.Stream in project trino by trinodb.
the class Checkpoints method getStreamCheckpoints.
public static Map<StreamId, StreamCheckpoint> getStreamCheckpoints(Set<OrcColumnId> columns, ColumnMetadata<OrcType> columnTypes, boolean compressed, int rowGroupId, ColumnMetadata<ColumnEncoding> columnEncodings, Map<StreamId, Stream> streams, Map<StreamId, List<RowGroupIndex>> columnIndexes) throws InvalidCheckpointException {
ImmutableSetMultimap.Builder<OrcColumnId, StreamKind> streamKindsBuilder = ImmutableSetMultimap.builder();
for (Stream stream : streams.values()) {
streamKindsBuilder.put(stream.getColumnId(), stream.getStreamKind());
}
SetMultimap<OrcColumnId, StreamKind> streamKinds = streamKindsBuilder.build();
ImmutableMap.Builder<StreamId, StreamCheckpoint> checkpoints = ImmutableMap.builder();
for (Map.Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) {
OrcColumnId columnId = entry.getKey().getColumnId();
if (!columns.contains(columnId)) {
continue;
}
List<Integer> positionsList = entry.getValue().get(rowGroupId).getPositions();
ColumnEncodingKind columnEncoding = columnEncodings.get(columnId).getColumnEncodingKind();
OrcTypeKind columnType = columnTypes.get(columnId).getOrcTypeKind();
Set<StreamKind> availableStreams = streamKinds.get(columnId);
ColumnPositionsList columnPositionsList = new ColumnPositionsList(columnId, columnType, positionsList);
switch(columnType) {
case BOOLEAN:
checkpoints.putAll(getBooleanColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
break;
case BYTE:
checkpoints.putAll(getByteColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
break;
case SHORT:
case INT:
case LONG:
case DATE:
checkpoints.putAll(getLongColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
break;
case FLOAT:
checkpoints.putAll(getFloatColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
break;
case DOUBLE:
checkpoints.putAll(getDoubleColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
break;
case TIMESTAMP:
case TIMESTAMP_INSTANT:
checkpoints.putAll(getTimestampColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
break;
case BINARY:
case STRING:
case VARCHAR:
case CHAR:
checkpoints.putAll(getSliceColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
break;
case LIST:
case MAP:
checkpoints.putAll(getListOrMapColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
break;
case STRUCT:
checkpoints.putAll(getStructColumnCheckpoints(columnId, compressed, availableStreams, columnPositionsList));
break;
case DECIMAL:
checkpoints.putAll(getDecimalColumnCheckpoints(columnId, columnEncoding, compressed, availableStreams, columnPositionsList));
break;
default:
throw new IllegalArgumentException("Unsupported column type " + columnType);
}
}
return checkpoints.buildOrThrow();
}
use of io.trino.orc.metadata.Stream in project trino by trinodb.
the class DecimalColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
List<DecimalStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
List<LongStreamCheckpoint> scaleCheckpoints = scaleStream.getCheckpoints();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
DecimalStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
LongStreamCheckpoint scaleCheckpoint = scaleCheckpoints.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createDecimalColumnPositionList(compressed, dataCheckpoint, scaleCheckpoint, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
return ImmutableList.of(new StreamDataOutput(slice, stream));
}
use of io.trino.orc.metadata.Stream in project trino by trinodb.
the class DoubleColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
List<DoubleStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
DoubleStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createDoubleColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Aggregations