use of parquet.format.PageHeader in project presto by prestodb.
the class ParquetColumnChunk method readAllPages.
public ParquetPageReader readAllPages() throws IOException {
List<ParquetDataPage> pages = new ArrayList<>();
ParquetDictionaryPage dictionaryPage = null;
long valueCount = 0;
while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) {
PageHeader pageHeader = readPageHeader();
int uncompressedPageSize = pageHeader.getUncompressed_page_size();
int compressedPageSize = pageHeader.getCompressed_page_size();
switch(pageHeader.type) {
case DICTIONARY_PAGE:
if (dictionaryPage != null) {
throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
}
dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
break;
case DATA_PAGE:
valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages);
break;
case DATA_PAGE_V2:
valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages);
break;
default:
skip(compressedPageSize);
break;
}
}
return new ParquetPageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage);
}
use of parquet.format.PageHeader in project presto by prestodb.
the class ParquetPredicateUtils method readDictionaryPage.
private static Optional<ParquetDictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) {
try {
ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
PageHeader pageHeader = Util.readPageHeader(inputStream);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
return Optional.empty();
}
Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
int dictionarySize = dicHeader.getNum_values();
return Optional.of(new ParquetDictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
} catch (IOException ignored) {
return Optional.empty();
}
}
Aggregations