use of io.trino.parquet.DictionaryPage in project trino by trinodb.
the class ParquetColumnChunk method readAllPages.
public PageReader readAllPages() throws IOException {
LinkedList<DataPage> pages = new LinkedList<>();
DictionaryPage dictionaryPage = null;
long valueCount = 0;
int dataPageCount = 0;
while (hasMorePages(valueCount, dataPageCount)) {
PageHeader pageHeader = readPageHeader();
int uncompressedPageSize = pageHeader.getUncompressed_page_size();
int compressedPageSize = pageHeader.getCompressed_page_size();
OptionalLong firstRowIndex;
switch(pageHeader.type) {
case DICTIONARY_PAGE:
if (dictionaryPage != null) {
throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
}
dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
break;
case DATA_PAGE:
firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages, firstRowIndex);
++dataPageCount;
break;
case DATA_PAGE_V2:
firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages, firstRowIndex);
++dataPageCount;
break;
default:
input.skip(compressedPageSize);
advanceIfNecessary();
break;
}
}
return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage, offsetIndex, valueCount);
}
use of io.trino.parquet.DictionaryPage in project trino by trinodb.
the class PredicateUtils method readDictionaryPage.
private static Optional<DictionaryPage> readDictionaryPage(Slice data, CompressionCodecName codecName) {
try {
SliceInput inputStream = data.getInput();
PageHeader pageHeader = Util.readPageHeader(inputStream);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
return Optional.empty();
}
Slice compressedData = inputStream.readSlice(pageHeader.getCompressed_page_size());
DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
int dictionarySize = dicHeader.getNum_values();
return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
} catch (IOException ignored) {
return Optional.empty();
}
}
use of io.trino.parquet.DictionaryPage in project trino by trinodb.
the class TupleDomainParquetPredicate method getDomain.
private static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescriptor, DateTimeZone timeZone) {
if (dictionaryDescriptor == null) {
return Domain.all(type);
}
ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor();
Optional<DictionaryPage> dictionaryPage = dictionaryDescriptor.getDictionaryPage();
if (dictionaryPage.isEmpty()) {
return Domain.all(type);
}
Dictionary dictionary;
try {
dictionary = dictionaryPage.get().getEncoding().initDictionary(columnDescriptor, dictionaryPage.get());
} catch (Exception e) {
// OK to ignore exception when reading dictionaries
return Domain.all(type);
}
int dictionarySize = dictionaryPage.get().getDictionarySize();
DictionaryValueConverter converter = new DictionaryValueConverter(dictionary);
Function<Integer, Object> convertFunction = converter.getConverter(columnDescriptor.getPrimitiveType());
List<Object> values = new ArrayList<>();
for (int i = 0; i < dictionarySize; i++) {
values.add(convertFunction.apply(i));
}
// TODO: when min == max (i.e., singleton ranges, the construction of Domains can be done more efficiently
return getDomain(columnDescriptor, type, values, values, true, timeZone);
}
use of io.trino.parquet.DictionaryPage in project trino by trinodb.
the class PrimitiveColumnReader method setPageReader.
public void setPageReader(PageReader pageReader, RowRanges rowRanges) {
this.pageReader = requireNonNull(pageReader, "pageReader");
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
dictionary = dictionaryPage.getEncoding().initDictionary(columnDescriptor, dictionaryPage);
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + columnDescriptor, e);
}
} else {
dictionary = null;
}
checkArgument(pageReader.getTotalValueCount() > 0, "page is empty");
totalValueCount = pageReader.getTotalValueCount();
if (rowRanges != null) {
indexIterator = rowRanges.iterator();
// If rowRanges is empty for a row-group, then no page needs to be read, and we should not reach here
checkArgument(indexIterator.hasNext(), "rowRanges is empty");
targetRow = indexIterator.next();
}
}
Aggregations