use of com.facebook.presto.hive.parquet.ParquetDataPage in project presto by prestodb.
the class ParquetColumnChunk method readAllPages.
public ParquetPageReader readAllPages() throws IOException {
List<ParquetDataPage> pages = new ArrayList<>();
ParquetDictionaryPage dictionaryPage = null;
long valueCount = 0;
while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) {
PageHeader pageHeader = readPageHeader();
int uncompressedPageSize = pageHeader.getUncompressed_page_size();
int compressedPageSize = pageHeader.getCompressed_page_size();
switch(pageHeader.type) {
case DICTIONARY_PAGE:
if (dictionaryPage != null) {
throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
}
dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
break;
case DATA_PAGE:
valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages);
break;
case DATA_PAGE_V2:
valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages);
break;
default:
skip(compressedPageSize);
break;
}
}
return new ParquetPageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage);
}
use of com.facebook.presto.hive.parquet.ParquetDataPage in project presto by prestodb.
the class ParquetPageReader method readPage.
public ParquetDataPage readPage() {
if (compressedPages.isEmpty()) {
return null;
}
ParquetDataPage compressedPage = compressedPages.remove(0);
try {
if (compressedPage instanceof ParquetDataPageV1) {
ParquetDataPageV1 dataPageV1 = (ParquetDataPageV1) compressedPage;
return new ParquetDataPageV1(decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize()), dataPageV1.getValueCount(), dataPageV1.getUncompressedSize(), dataPageV1.getStatistics(), dataPageV1.getRepetitionLevelEncoding(), dataPageV1.getDefinitionLevelEncoding(), dataPageV1.getValueEncoding());
} else {
ParquetDataPageV2 dataPageV2 = (ParquetDataPageV2) compressedPage;
if (!dataPageV2.isCompressed()) {
return dataPageV2;
}
int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize() - dataPageV2.getDefinitionLevels().length() - dataPageV2.getRepetitionLevels().length());
return new ParquetDataPageV2(dataPageV2.getRowCount(), dataPageV2.getNullCount(), dataPageV2.getValueCount(), dataPageV2.getRepetitionLevels(), dataPageV2.getDefinitionLevels(), dataPageV2.getDataEncoding(), decompress(codec, dataPageV2.getSlice(), uncompressedSize), dataPageV2.getUncompressedSize(), dataPageV2.getStatistics(), false);
}
} catch (IOException e) {
throw new RuntimeException("Could not decompress page", e);
}
}
Aggregations