Search in sources :

Example 1 with PageHeader

use of parquet.format.PageHeader in project presto by prestodb.

the class ParquetColumnChunk method readAllPages.

public ParquetPageReader readAllPages() throws IOException {
    List<ParquetDataPage> pages = new ArrayList<>();
    ParquetDictionaryPage dictionaryPage = null;
    long valueCount = 0;
    while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) {
        PageHeader pageHeader = readPageHeader();
        int uncompressedPageSize = pageHeader.getUncompressed_page_size();
        int compressedPageSize = pageHeader.getCompressed_page_size();
        switch(pageHeader.type) {
            case DICTIONARY_PAGE:
                if (dictionaryPage != null) {
                    throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
                }
                dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
                break;
            case DATA_PAGE:
                valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages);
                break;
            case DATA_PAGE_V2:
                valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages);
                break;
            default:
                skip(compressedPageSize);
                break;
        }
    }
    return new ParquetPageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage);
}
Also used : ParquetCorruptionException(com.facebook.presto.hive.parquet.ParquetCorruptionException) ParquetDictionaryPage(com.facebook.presto.hive.parquet.ParquetDictionaryPage) PageHeader(parquet.format.PageHeader) DictionaryPageHeader(parquet.format.DictionaryPageHeader) DataPageHeader(parquet.format.DataPageHeader) ParquetDataPage(com.facebook.presto.hive.parquet.ParquetDataPage) ArrayList(java.util.ArrayList)

Example 2 with PageHeader

use of parquet.format.PageHeader in project presto by prestodb.

the class ParquetPredicateUtils method readDictionaryPage.

private static Optional<ParquetDictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) {
    try {
        ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
        PageHeader pageHeader = Util.readPageHeader(inputStream);
        if (pageHeader.type != PageType.DICTIONARY_PAGE) {
            return Optional.empty();
        }
        Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
        DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
        ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
        int dictionarySize = dicHeader.getNum_values();
        return Optional.of(new ParquetDictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
    } catch (IOException ignored) {
        return Optional.empty();
    }
}
Also used : ParquetDictionaryPage(com.facebook.presto.hive.parquet.ParquetDictionaryPage) ByteArrayInputStream(java.io.ByteArrayInputStream) PageHeader(parquet.format.PageHeader) DictionaryPageHeader(parquet.format.DictionaryPageHeader) Slice(io.airlift.slice.Slice) DictionaryPageHeader(parquet.format.DictionaryPageHeader) IOException(java.io.IOException) ParquetEncoding(com.facebook.presto.hive.parquet.ParquetEncoding) ParquetTypeUtils.getParquetEncoding(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetEncoding)

Aggregations

ParquetDictionaryPage (com.facebook.presto.hive.parquet.ParquetDictionaryPage)2 DictionaryPageHeader (parquet.format.DictionaryPageHeader)2 PageHeader (parquet.format.PageHeader)2 ParquetCorruptionException (com.facebook.presto.hive.parquet.ParquetCorruptionException)1 ParquetDataPage (com.facebook.presto.hive.parquet.ParquetDataPage)1 ParquetEncoding (com.facebook.presto.hive.parquet.ParquetEncoding)1 ParquetTypeUtils.getParquetEncoding (com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetEncoding)1 Slice (io.airlift.slice.Slice)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 DataPageHeader (parquet.format.DataPageHeader)1