use of io.trino.orc.checkpoint.StreamCheckpoint in project trino by trinodb.
the class StripeReader method createRowGroups.
private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
int rowsInRowGroup = this.rowsInRowGroup.orElseThrow(() -> new IllegalStateException("Cannot create row groups if row group info is missing"));
ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
for (int rowGroupId : selectedRowGroups) {
Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
int rowOffset = rowGroupId * rowsInRowGroup;
int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
}
return rowGroupBuilder.build();
}
use of io.trino.orc.checkpoint.StreamCheckpoint in project trino by trinodb.
the class StripeReader method createDictionaryStreamSources.
private InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, ColumnMetadata<ColumnEncoding> columnEncodings) {
ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder();
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
StreamId streamId = entry.getKey();
Stream stream = entry.getValue();
OrcColumnId column = stream.getColumnId();
// only process dictionary streams
ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind();
if (!isDictionary(stream, columnEncoding)) {
continue;
}
// skip streams without data
ValueInputStream<?> valueStream = valueStreams.get(streamId);
if (valueStream == null) {
continue;
}
OrcTypeKind columnType = types.get(stream.getColumnId()).getOrcTypeKind();
StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding);
InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint);
dictionaryStreamBuilder.put(streamId, streamSource);
}
return new InputStreamSources(dictionaryStreamBuilder.buildOrThrow());
}
use of io.trino.orc.checkpoint.StreamCheckpoint in project trino by trinodb.
the class StripeReader method createRowGroup.
private static RowGroup createRowGroup(int groupId, int rowOffset, int rowCount, long minAverageRowBytes, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, StreamCheckpoint> checkpoints) {
ImmutableMap.Builder<StreamId, InputStreamSource<?>> builder = ImmutableMap.builder();
for (Entry<StreamId, StreamCheckpoint> entry : checkpoints.entrySet()) {
StreamId streamId = entry.getKey();
StreamCheckpoint checkpoint = entry.getValue();
// skip streams without data
ValueInputStream<?> valueStream = valueStreams.get(streamId);
if (valueStream == null) {
continue;
}
builder.put(streamId, createCheckpointStreamSource(valueStream, checkpoint));
}
InputStreamSources rowGroupStreams = new InputStreamSources(builder.buildOrThrow());
return new RowGroup(groupId, rowOffset, rowCount, minAverageRowBytes, rowGroupStreams);
}
Aggregations