use of io.trino.orc.metadata.ColumnEncoding in project trino by trinodb.
the class SliceDictionaryColumnWriter method bufferOutputData.
private void bufferOutputData() {
checkState(closed);
checkState(!directEncoded);
Block dictionaryElements = dictionary.getElementBlock();
// write dictionary in sorted order
int[] sortedDictionaryIndexes = getSortedDictionaryNullsLast(dictionaryElements);
for (int sortedDictionaryIndex : sortedDictionaryIndexes) {
if (!dictionaryElements.isNull(sortedDictionaryIndex)) {
int length = dictionaryElements.getSliceLength(sortedDictionaryIndex);
dictionaryLengthStream.writeLong(length);
Slice value = dictionaryElements.getSlice(sortedDictionaryIndex, 0, length);
dictionaryDataStream.writeSlice(value);
}
}
columnEncoding = new ColumnEncoding(DICTIONARY_V2, dictionaryElements.getPositionCount() - 1);
// build index from original dictionary index to new sorted position
int[] originalDictionaryToSortedIndex = new int[sortedDictionaryIndexes.length];
for (int sortOrdinal = 0; sortOrdinal < sortedDictionaryIndexes.length; sortOrdinal++) {
int dictionaryIndex = sortedDictionaryIndexes[sortOrdinal];
originalDictionaryToSortedIndex[dictionaryIndex] = sortOrdinal;
}
if (!rowGroups.isEmpty()) {
presentStream.recordCheckpoint();
dataStream.recordCheckpoint();
}
for (DictionaryRowGroup rowGroup : rowGroups) {
IntBigArray dictionaryIndexes = rowGroup.getDictionaryIndexes();
for (int position = 0; position < rowGroup.getValueCount(); position++) {
presentStream.writeBoolean(dictionaryIndexes.get(position) != 0);
}
for (int position = 0; position < rowGroup.getValueCount(); position++) {
int originalDictionaryIndex = dictionaryIndexes.get(position);
// index zero in original dictionary is reserved for null
if (originalDictionaryIndex != 0) {
int sortedIndex = originalDictionaryToSortedIndex[originalDictionaryIndex];
if (sortedIndex < 0) {
throw new IllegalArgumentException();
}
dataStream.writeLong(sortedIndex);
}
}
presentStream.recordCheckpoint();
dataStream.recordCheckpoint();
}
// free the dictionary memory
dictionary.clear();
dictionaryDataStream.close();
dictionaryLengthStream.close();
dataStream.close();
presentStream.close();
}
use of io.trino.orc.metadata.ColumnEncoding in project trino by trinodb.
the class StripeReader method readStripe.
public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext memoryUsage) throws IOException {
// read the stripe footer
StripeFooter stripeFooter = readStripeFooter(stripe, memoryUsage);
ColumnMetadata<ColumnEncoding> columnEncodings = stripeFooter.getColumnEncodings();
if (writeValidation.isPresent()) {
writeValidation.get().validateTimeZone(orcDataSource.getId(), stripeFooter.getTimeZone());
}
ZoneId fileTimeZone = stripeFooter.getTimeZone();
// get streams for selected columns
Map<StreamId, Stream> streams = new HashMap<>();
for (Stream stream : stripeFooter.getStreams()) {
if (includedOrcColumnIds.contains(stream.getColumnId()) && isSupportedStreamType(stream, types.get(stream.getColumnId()).getOrcTypeKind())) {
streams.put(new StreamId(stream), stream);
}
}
// handle stripes with more than one row group
boolean invalidCheckPoint = false;
if (rowsInRowGroup.isPresent() && stripe.getNumberOfRows() > rowsInRowGroup.getAsInt()) {
// determine ranges of the stripe to read
Map<StreamId, DiskRange> diskRanges = getDiskRanges(stripeFooter.getStreams());
diskRanges = Maps.filterKeys(diskRanges, Predicates.in(streams.keySet()));
// read the file regions
Map<StreamId, OrcChunkLoader> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, memoryUsage);
// read the bloom filter for each column
Map<OrcColumnId, List<BloomFilter>> bloomFilterIndexes = readBloomFilterIndexes(streams, streamsData);
// read the row index for each column
Map<StreamId, List<RowGroupIndex>> columnIndexes = readColumnIndexes(streams, streamsData, bloomFilterIndexes);
if (writeValidation.isPresent()) {
writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), stripe.getOffset(), columnIndexes);
}
// select the row groups matching the tuple domain
Set<Integer> selectedRowGroups = selectRowGroups(stripe, columnIndexes);
// if all row groups are skipped, return null
if (selectedRowGroups.isEmpty()) {
// set accounted memory usage to zero
memoryUsage.close();
return null;
}
// value streams
Map<StreamId, ValueInputStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
// build the dictionary streams
InputStreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
// build the row groups
try {
List<RowGroup> rowGroups = createRowGroups(stripe.getNumberOfRows(), streams, valueStreams, columnIndexes, selectedRowGroups, columnEncodings);
return new Stripe(stripe.getNumberOfRows(), fileTimeZone, columnEncodings, rowGroups, dictionaryStreamSources);
} catch (InvalidCheckpointException e) {
// The ORC file contains a corrupt checkpoint stream treat the stripe as a single row group.
invalidCheckPoint = true;
}
}
// stripe only has one row group
ImmutableMap.Builder<StreamId, DiskRange> diskRangesBuilder = ImmutableMap.builder();
for (Entry<StreamId, DiskRange> entry : getDiskRanges(stripeFooter.getStreams()).entrySet()) {
StreamId streamId = entry.getKey();
if (streams.containsKey(streamId)) {
diskRangesBuilder.put(entry);
}
}
ImmutableMap<StreamId, DiskRange> diskRanges = diskRangesBuilder.buildOrThrow();
// read the file regions
Map<StreamId, OrcChunkLoader> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, memoryUsage);
long minAverageRowBytes = 0;
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
if (entry.getKey().getStreamKind() == ROW_INDEX) {
List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, new OrcInputStream(streamsData.get(entry.getKey())));
checkState(rowGroupIndexes.size() == 1 || invalidCheckPoint, "expect a single row group or an invalid check point");
long totalBytes = 0;
long totalRows = 0;
for (RowGroupIndex rowGroupIndex : rowGroupIndexes) {
ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics();
if (columnStatistics.hasMinAverageValueSizeInBytes()) {
totalBytes += columnStatistics.getMinAverageValueSizeInBytes() * columnStatistics.getNumberOfValues();
totalRows += columnStatistics.getNumberOfValues();
}
}
if (totalRows > 0) {
minAverageRowBytes += totalBytes / totalRows;
}
}
}
// value streams
Map<StreamId, ValueInputStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
// build the dictionary streams
InputStreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
// build the row group
ImmutableMap.Builder<StreamId, InputStreamSource<?>> builder = ImmutableMap.builder();
for (Entry<StreamId, ValueInputStream<?>> entry : valueStreams.entrySet()) {
builder.put(entry.getKey(), new ValueInputStreamSource<>(entry.getValue()));
}
RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), minAverageRowBytes, new InputStreamSources(builder.buildOrThrow()));
return new Stripe(stripe.getNumberOfRows(), fileTimeZone, columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources);
}
use of io.trino.orc.metadata.ColumnEncoding in project trino by trinodb.
the class StripeReader method createRowGroups.
private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
int rowsInRowGroup = this.rowsInRowGroup.orElseThrow(() -> new IllegalStateException("Cannot create row groups if row group info is missing"));
ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
for (int rowGroupId : selectedRowGroups) {
Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
int rowOffset = rowGroupId * rowsInRowGroup;
int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
}
return rowGroupBuilder.build();
}
use of io.trino.orc.metadata.ColumnEncoding in project trino by trinodb.
the class OrcRecordReader method advanceToNextStripe.
private void advanceToNextStripe() throws IOException {
currentStripeMemoryContext.close();
currentStripeMemoryContext = memoryUsage.newAggregatedMemoryContext();
rowGroups = ImmutableList.<RowGroup>of().iterator();
if (currentStripe >= 0) {
if (stripeStatisticsValidation.isPresent()) {
StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
long offset = stripes.get(currentStripe).getOffset();
writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build().get());
statisticsValidation.reset();
}
}
currentStripe++;
if (currentStripe >= stripes.size()) {
return;
}
if (currentStripe > 0) {
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
}
StripeInformation stripeInformation = stripes.get(currentStripe);
validateWriteStripe(stripeInformation.getNumberOfRows());
Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeMemoryContext);
if (stripe != null) {
// Give readers access to dictionary streams
InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
ZoneId fileTimeZone = stripe.getFileTimeZone();
for (ColumnReader column : columnReaders) {
if (column != null) {
column.startStripe(fileTimeZone, dictionaryStreamSources, columnEncodings);
}
}
rowGroups = stripe.getRowGroups().iterator();
}
orcDataSourceMemoryUsage.setBytes(orcDataSource.getRetainedSize());
}
use of io.trino.orc.metadata.ColumnEncoding in project trino by trinodb.
the class OrcWriter method bufferStripeData.
/**
* Collect the data for the stripe. This is not the actual data, but
* instead are functions that know how to write the data.
*/
private List<OrcDataOutput> bufferStripeData(long stripeStartOffset, FlushReason flushReason) throws IOException {
if (stripeRowCount == 0) {
verify(flushReason == CLOSED, "An empty stripe is not allowed");
// column writers must be closed or the reset call will fail
columnWriters.forEach(ColumnWriter::close);
return ImmutableList.of();
}
if (rowGroupRowCount > 0) {
finishRowGroup();
}
// convert any dictionary encoded column with a low compression ratio to direct
dictionaryCompressionOptimizer.finalOptimize(bufferedBytes);
columnWriters.forEach(ColumnWriter::close);
List<OrcDataOutput> outputData = new ArrayList<>();
List<Stream> allStreams = new ArrayList<>(columnWriters.size() * 3);
// get index streams
long indexLength = 0;
for (ColumnWriter columnWriter : columnWriters) {
for (StreamDataOutput indexStream : columnWriter.getIndexStreams(metadataWriter)) {
// The ordering is critical because the stream only contain a length with no offset.
outputData.add(indexStream);
allStreams.add(indexStream.getStream());
indexLength += indexStream.size();
}
for (StreamDataOutput bloomFilter : columnWriter.getBloomFilters(metadataWriter)) {
outputData.add(bloomFilter);
allStreams.add(bloomFilter.getStream());
indexLength += bloomFilter.size();
}
}
// data streams (sorted by size)
long dataLength = 0;
List<StreamDataOutput> dataStreams = new ArrayList<>(columnWriters.size() * 2);
for (ColumnWriter columnWriter : columnWriters) {
List<StreamDataOutput> streams = columnWriter.getDataStreams();
dataStreams.addAll(streams);
dataLength += streams.stream().mapToLong(StreamDataOutput::size).sum();
}
Collections.sort(dataStreams);
// add data streams
for (StreamDataOutput dataStream : dataStreams) {
// The ordering is critical because the stream only contain a length with no offset.
outputData.add(dataStream);
allStreams.add(dataStream.getStream());
}
Map<OrcColumnId, ColumnEncoding> columnEncodings = new HashMap<>();
columnWriters.forEach(columnWriter -> columnEncodings.putAll(columnWriter.getColumnEncodings()));
Map<OrcColumnId, ColumnStatistics> columnStatistics = new HashMap<>();
columnWriters.forEach(columnWriter -> columnStatistics.putAll(columnWriter.getColumnStripeStatistics()));
// the 0th column is a struct column for the whole row
columnEncodings.put(ROOT_COLUMN, new ColumnEncoding(DIRECT, 0));
columnStatistics.put(ROOT_COLUMN, new ColumnStatistics((long) stripeRowCount, 0, null, null, null, null, null, null, null, null, null));
// add footer
StripeFooter stripeFooter = new StripeFooter(allStreams, toColumnMetadata(columnEncodings, orcTypes.size()), ZoneId.of("UTC"));
Slice footer = metadataWriter.writeStripeFooter(stripeFooter);
outputData.add(createDataOutput(footer));
// create final stripe statistics
StripeStatistics statistics = new StripeStatistics(toColumnMetadata(columnStatistics, orcTypes.size()));
recordValidation(validation -> validation.addStripeStatistics(stripeStartOffset, statistics));
StripeInformation stripeInformation = new StripeInformation(stripeRowCount, stripeStartOffset, indexLength, dataLength, footer.length());
ClosedStripe closedStripe = new ClosedStripe(stripeInformation, statistics);
closedStripes.add(closedStripe);
closedStripesRetainedBytes += closedStripe.getRetainedSizeInBytes();
recordValidation(validation -> validation.addStripe(stripeInformation.getNumberOfRows()));
stats.recordStripeWritten(flushReason, stripeInformation.getTotalLength(), stripeInformation.getNumberOfRows(), dictionaryCompressionOptimizer.getDictionaryMemoryBytes());
return outputData;
}
Aggregations