Search in sources :

Example 1 with PageHeader

use of org.apache.parquet.format.PageHeader in project drill by apache.

the class PageReader method loadDictionaryIfExists.

protected void loadDictionaryIfExists(final org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, final ColumnChunkMetaData columnChunkMetaData, final DirectBufInputStream f) throws IOException {
    Stopwatch timer = Stopwatch.createUnstarted();
    if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        dataReader.skip(columnChunkMetaData.getDictionaryPageOffset() - dataReader.getPos());
        long start = dataReader.getPos();
        timer.start();
        final PageHeader pageHeader = Util.readPageHeader(f);
        long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
        long pageHeaderBytes = dataReader.getPos() - start;
        this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;
        readDictionaryPage(pageHeader, parentStatus);
    }
}
Also used : PageHeader(org.apache.parquet.format.PageHeader) Stopwatch(com.google.common.base.Stopwatch)

Example 2 with PageHeader

use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.

the class ParquetFileReader method readDictionary.

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
    if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
        return null;
    }
    // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
    if (f.getPos() != meta.getStartingPos()) {
        f.seek(meta.getStartingPos());
    }
    PageHeader pageHeader = Util.readPageHeader(f);
    if (!pageHeader.isSetDictionary_page_header()) {
        // TODO: should this complain?
        return null;
    }
    DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
    BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
    return new DictionaryPage(decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding());
}
Also used : PageHeader(org.apache.parquet.format.PageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) BytesInputDecompressor(org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor) DictionaryPage(org.apache.parquet.column.page.DictionaryPage)

Example 3 with PageHeader

use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.

the class ParquetMetadataConverter method writeDictionaryPageHeader.

public void writeDictionaryPageHeader(int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException {
    PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
    pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding)));
    writePageHeader(pageHeader, to);
}
Also used : PageHeader(org.apache.parquet.format.PageHeader) Util.writePageHeader(org.apache.parquet.format.Util.writePageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader)

Example 4 with PageHeader

use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.

the class ParquetMetadataConverter method newDataPageHeader.

private PageHeader newDataPageHeader(int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) {
    PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
    // TODO: pageHeader.crc = ...;
    pageHeader.setData_page_header(new DataPageHeader(valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding)));
    if (!statistics.isEmpty()) {
        pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics));
    }
    return pageHeader;
}
Also used : PageHeader(org.apache.parquet.format.PageHeader) Util.writePageHeader(org.apache.parquet.format.Util.writePageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader)

Example 5 with PageHeader

use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.

the class ParquetMetadataConverter method newDataPageV2Header.

private PageHeader newDataPageV2Header(int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.statistics.Statistics<?> statistics, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) {
    // TODO: pageHeader.crc = ...;
    DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2(valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength);
    if (!statistics.isEmpty()) {
        dataPageHeaderV2.setStatistics(toParquetStatistics(statistics));
    }
    PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize);
    pageHeader.setData_page_header_v2(dataPageHeaderV2);
    return pageHeader;
}
Also used : PageHeader(org.apache.parquet.format.PageHeader) Util.writePageHeader(org.apache.parquet.format.Util.writePageHeader) DictionaryPageHeader(org.apache.parquet.format.DictionaryPageHeader) DataPageHeader(org.apache.parquet.format.DataPageHeader) DataPageHeaderV2(org.apache.parquet.format.DataPageHeaderV2)

Aggregations

PageHeader (org.apache.parquet.format.PageHeader)7 DataPageHeader (org.apache.parquet.format.DataPageHeader)4 DictionaryPageHeader (org.apache.parquet.format.DictionaryPageHeader)4 Util.writePageHeader (org.apache.parquet.format.Util.writePageHeader)4 Stopwatch (com.google.common.base.Stopwatch)2 DrillBuf (io.netty.buffer.DrillBuf)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 EOFException (java.io.EOFException)1 DictionaryPage (org.apache.parquet.column.page.DictionaryPage)1 BytesInputDecompressor (org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor)1 DataPageHeaderV2 (org.apache.parquet.format.DataPageHeaderV2)1 PageType (org.apache.parquet.format.PageType)1 Util.readPageHeader (org.apache.parquet.format.Util.readPageHeader)1 ParquetMetadataConverter.filterFileMetaDataByMidpoint (org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByMidpoint)1 Test (org.junit.Test)1