use of org.apache.parquet.format.PageHeader in project drill by apache.
the class PageReader method loadDictionaryIfExists.
protected void loadDictionaryIfExists(final org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, final ColumnChunkMetaData columnChunkMetaData, final DirectBufInputStream f) throws IOException {
Stopwatch timer = Stopwatch.createUnstarted();
if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
dataReader.skip(columnChunkMetaData.getDictionaryPageOffset() - dataReader.getPos());
long start = dataReader.getPos();
timer.start();
final PageHeader pageHeader = Util.readPageHeader(f);
long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
long pageHeaderBytes = dataReader.getPos() - start;
this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
assert pageHeader.type == PageType.DICTIONARY_PAGE;
readDictionaryPage(pageHeader, parentStatus);
}
}
use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.
the class ParquetFileReader method readDictionary.
/**
* Reads and decompresses a dictionary page for the given column chunk.
*
* Returns null if the given column chunk has no dictionary page.
*
* @param meta a column's ColumnChunkMetaData to read the dictionary from
* @return an uncompressed DictionaryPage or null
* @throws IOException
*/
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
return null;
}
// TODO: this should use getDictionaryPageOffset() but it isn't reliable.
if (f.getPos() != meta.getStartingPos()) {
f.seek(meta.getStartingPos());
}
PageHeader pageHeader = Util.readPageHeader(f);
if (!pageHeader.isSetDictionary_page_header()) {
// TODO: should this complain?
return null;
}
DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
return new DictionaryPage(decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding());
}
use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.
the class ParquetMetadataConverter method writeDictionaryPageHeader.
public void writeDictionaryPageHeader(int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException {
PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding)));
writePageHeader(pageHeader, to);
}
use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.
the class ParquetMetadataConverter method newDataPageHeader.
private PageHeader newDataPageHeader(int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) {
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
// TODO: pageHeader.crc = ...;
pageHeader.setData_page_header(new DataPageHeader(valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding)));
if (!statistics.isEmpty()) {
pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics));
}
return pageHeader;
}
use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.
the class ParquetMetadataConverter method newDataPageV2Header.
private PageHeader newDataPageV2Header(int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.statistics.Statistics<?> statistics, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) {
// TODO: pageHeader.crc = ...;
DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2(valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength);
if (!statistics.isEmpty()) {
dataPageHeaderV2.setStatistics(toParquetStatistics(statistics));
}
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize);
pageHeader.setData_page_header_v2(dataPageHeaderV2);
return pageHeader;
}
Aggregations