Search in sources :

Example 1 with ParquetDataPage

use of com.facebook.presto.hive.parquet.ParquetDataPage in project presto by prestodb.

the class ParquetColumnChunk method readAllPages.

public ParquetPageReader readAllPages() throws IOException {
    List<ParquetDataPage> pages = new ArrayList<>();
    ParquetDictionaryPage dictionaryPage = null;
    long valueCount = 0;
    while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) {
        PageHeader pageHeader = readPageHeader();
        int uncompressedPageSize = pageHeader.getUncompressed_page_size();
        int compressedPageSize = pageHeader.getCompressed_page_size();
        switch(pageHeader.type) {
            case DICTIONARY_PAGE:
                if (dictionaryPage != null) {
                    throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
                }
                dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
                break;
            case DATA_PAGE:
                valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages);
                break;
            case DATA_PAGE_V2:
                valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages);
                break;
            default:
                skip(compressedPageSize);
                break;
        }
    }
    return new ParquetPageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage);
}
Also used : ParquetCorruptionException(com.facebook.presto.hive.parquet.ParquetCorruptionException) ParquetDictionaryPage(com.facebook.presto.hive.parquet.ParquetDictionaryPage) PageHeader(parquet.format.PageHeader) DictionaryPageHeader(parquet.format.DictionaryPageHeader) DataPageHeader(parquet.format.DataPageHeader) ParquetDataPage(com.facebook.presto.hive.parquet.ParquetDataPage) ArrayList(java.util.ArrayList)

Example 2 with ParquetDataPage

use of com.facebook.presto.hive.parquet.ParquetDataPage in project presto by prestodb.

the class ParquetPageReader method readPage.

public ParquetDataPage readPage() {
    if (compressedPages.isEmpty()) {
        return null;
    }
    ParquetDataPage compressedPage = compressedPages.remove(0);
    try {
        if (compressedPage instanceof ParquetDataPageV1) {
            ParquetDataPageV1 dataPageV1 = (ParquetDataPageV1) compressedPage;
            return new ParquetDataPageV1(decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize()), dataPageV1.getValueCount(), dataPageV1.getUncompressedSize(), dataPageV1.getStatistics(), dataPageV1.getRepetitionLevelEncoding(), dataPageV1.getDefinitionLevelEncoding(), dataPageV1.getValueEncoding());
        } else {
            ParquetDataPageV2 dataPageV2 = (ParquetDataPageV2) compressedPage;
            if (!dataPageV2.isCompressed()) {
                return dataPageV2;
            }
            int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize() - dataPageV2.getDefinitionLevels().length() - dataPageV2.getRepetitionLevels().length());
            return new ParquetDataPageV2(dataPageV2.getRowCount(), dataPageV2.getNullCount(), dataPageV2.getValueCount(), dataPageV2.getRepetitionLevels(), dataPageV2.getDefinitionLevels(), dataPageV2.getDataEncoding(), decompress(codec, dataPageV2.getSlice(), uncompressedSize), dataPageV2.getUncompressedSize(), dataPageV2.getStatistics(), false);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not decompress page", e);
    }
}
Also used : ParquetDataPageV1(com.facebook.presto.hive.parquet.ParquetDataPageV1) ParquetDataPage(com.facebook.presto.hive.parquet.ParquetDataPage) ParquetDataPageV2(com.facebook.presto.hive.parquet.ParquetDataPageV2) IOException(java.io.IOException)

Aggregations

ParquetDataPage (com.facebook.presto.hive.parquet.ParquetDataPage)2 ParquetCorruptionException (com.facebook.presto.hive.parquet.ParquetCorruptionException)1 ParquetDataPageV1 (com.facebook.presto.hive.parquet.ParquetDataPageV1)1 ParquetDataPageV2 (com.facebook.presto.hive.parquet.ParquetDataPageV2)1 ParquetDictionaryPage (com.facebook.presto.hive.parquet.ParquetDictionaryPage)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 DataPageHeader (parquet.format.DataPageHeader)1 DictionaryPageHeader (parquet.format.DictionaryPageHeader)1 PageHeader (parquet.format.PageHeader)1