Search in sources :

Example 1 with DataPage

use of com.facebook.presto.parquet.DataPage in project presto by prestodb.

the class BinaryFlatBatchReader method readNextPage.

protected boolean readNextPage() {
    definitionLevelDecoder = null;
    valuesDecoder = null;
    remainingCountInPage = 0;
    DataPage page = pageReader.readPage();
    if (page == null) {
        return false;
    }
    FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary);
    definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder();
    valuesDecoder = (BinaryValuesDecoder) flatDecoders.getValuesDecoder();
    remainingCountInPage = page.getValueCount();
    return true;
}
Also used : DataPage(com.facebook.presto.parquet.DataPage) FlatDecoders(com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders)

Example 2 with DataPage

use of com.facebook.presto.parquet.DataPage in project presto by prestodb.

the class ParquetColumnChunk method readAllPages.

public PageReader readAllPages() throws IOException {
    LinkedList<DataPage> pages = new LinkedList<>();
    DictionaryPage dictionaryPage = null;
    long valueCount = 0;
    int dataPageCount = 0;
    while (hasMorePages(valueCount, dataPageCount)) {
        PageHeader pageHeader = readPageHeader();
        int uncompressedPageSize = pageHeader.getUncompressed_page_size();
        int compressedPageSize = pageHeader.getCompressed_page_size();
        long firstRowIndex = -1;
        switch(pageHeader.type) {
            case DICTIONARY_PAGE:
                if (dictionaryPage != null) {
                    throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
                }
                dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
                break;
            case DATA_PAGE:
                firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
                valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, firstRowIndex, pages);
                dataPageCount = dataPageCount + 1;
                break;
            case DATA_PAGE_V2:
                firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
                valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, firstRowIndex, pages);
                dataPageCount = dataPageCount + 1;
                break;
            default:
                stream.skipFully(compressedPageSize);
                break;
        }
    }
    return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage, offsetIndex);
}
Also used : ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) DataPage(com.facebook.presto.parquet.DataPage) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) PageHeader(org.apache.parquet.format.PageHeader) LinkedList(java.util.LinkedList) DictionaryPage(com.facebook.presto.parquet.DictionaryPage)

Example 3 with DataPage

use of com.facebook.presto.parquet.DataPage in project presto by prestodb.

the class PageReader method readPage.

public DataPage readPage() {
    if (compressedPages.isEmpty()) {
        return null;
    }
    DataPage compressedPage = compressedPages.removeFirst();
    try {
        long firstRowIndex = getFirstRowIndex(pageIndex, offsetIndex);
        pageIndex = pageIndex + 1;
        if (compressedPage instanceof DataPageV1) {
            DataPageV1 dataPageV1 = (DataPageV1) compressedPage;
            Slice slice = decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize());
            return new DataPageV1(slice, dataPageV1.getValueCount(), dataPageV1.getUncompressedSize(), firstRowIndex, dataPageV1.getStatistics(), dataPageV1.getRepetitionLevelEncoding(), dataPageV1.getDefinitionLevelEncoding(), dataPageV1.getValueEncoding());
        } else {
            DataPageV2 dataPageV2 = (DataPageV2) compressedPage;
            if (!dataPageV2.isCompressed()) {
                return dataPageV2;
            }
            int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize() - dataPageV2.getDefinitionLevels().length() - dataPageV2.getRepetitionLevels().length());
            Slice slice = decompress(codec, dataPageV2.getSlice(), uncompressedSize);
            return new DataPageV2(dataPageV2.getRowCount(), dataPageV2.getNullCount(), dataPageV2.getValueCount(), firstRowIndex, dataPageV2.getRepetitionLevels(), dataPageV2.getDefinitionLevels(), dataPageV2.getDataEncoding(), slice, dataPageV2.getUncompressedSize(), dataPageV2.getStatistics(), false);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not decompress page", e);
    }
}
Also used : DataPage(com.facebook.presto.parquet.DataPage) Slice(io.airlift.slice.Slice) DataPageV2(com.facebook.presto.parquet.DataPageV2) DataPageV1(com.facebook.presto.parquet.DataPageV1) IOException(java.io.IOException)

Example 4 with DataPage

use of com.facebook.presto.parquet.DataPage in project presto by prestodb.

the class Int32FlatBatchReader method readNextPage.

protected boolean readNextPage() {
    definitionLevelDecoder = null;
    valuesDecoder = null;
    remainingCountInPage = 0;
    DataPage page = pageReader.readPage();
    if (page == null) {
        return false;
    }
    FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary);
    definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder();
    valuesDecoder = (Int32ValuesDecoder) flatDecoders.getValuesDecoder();
    remainingCountInPage = page.getValueCount();
    return true;
}
Also used : DataPage(com.facebook.presto.parquet.DataPage) FlatDecoders(com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders)

Example 5 with DataPage

use of com.facebook.presto.parquet.DataPage in project presto by prestodb.

the class AbstractNestedBatchReader method readNextPage.

protected void readNextPage() {
    remainingCountInPage = 0;
    DataPage page = pageReader.readPage();
    if (page == null) {
        return;
    }
    NestedDecoders nestedDecoders = readNestedPage(page, columnDescriptor, dictionary);
    repetitionLevelDecoder = nestedDecoders.getRepetitionLevelDecoder();
    definitionLevelDecoder = nestedDecoders.getDefinitionLevelDecoder();
    valuesDecoder = nestedDecoders.getValuesDecoder();
    remainingCountInPage = page.getValueCount();
}
Also used : DataPage(com.facebook.presto.parquet.DataPage) NestedDecoders(com.facebook.presto.parquet.batchreader.decoders.Decoders.NestedDecoders)

Aggregations

DataPage (com.facebook.presto.parquet.DataPage)5 FlatDecoders (com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders)2 DataPageV1 (com.facebook.presto.parquet.DataPageV1)1 DataPageV2 (com.facebook.presto.parquet.DataPageV2)1 DictionaryPage (com.facebook.presto.parquet.DictionaryPage)1 ParquetCorruptionException (com.facebook.presto.parquet.ParquetCorruptionException)1 NestedDecoders (com.facebook.presto.parquet.batchreader.decoders.Decoders.NestedDecoders)1 Slice (io.airlift.slice.Slice)1 IOException (java.io.IOException)1 LinkedList (java.util.LinkedList)1 DataPageHeader (org.apache.parquet.format.DataPageHeader)1 DictionaryPageHeader (org.apache.parquet.format.DictionaryPageHeader)1 PageHeader (org.apache.parquet.format.PageHeader)1