use of org.apache.parquet.format.PageHeader in project parquet-mr by apache.
the class TestParquetMetadataConverter method testPageHeader.
@Test
public void testPageHeader() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PageType type = PageType.DATA_PAGE;
int compSize = 10;
int uncSize = 20;
PageHeader pageHeader = new PageHeader(type, uncSize, compSize);
writePageHeader(pageHeader, out);
PageHeader readPageHeader = readPageHeader(new ByteArrayInputStream(out.toByteArray()));
assertEquals(pageHeader, readPageHeader);
}
use of org.apache.parquet.format.PageHeader in project drill by axbaretto.
the class PageReader method loadDictionaryIfExists.
protected void loadDictionaryIfExists(final org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, final ColumnChunkMetaData columnChunkMetaData, final DirectBufInputStream f) throws IOException {
Stopwatch timer = Stopwatch.createUnstarted();
if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
long bytesToSkip = columnChunkMetaData.getDictionaryPageOffset() - dataReader.getPos();
while (bytesToSkip > 0) {
long skipped = dataReader.skip(bytesToSkip);
if (skipped > 0) {
bytesToSkip -= skipped;
} else {
// no good way to handle this. Guava uses InputStream.available to check
// if EOF is reached and because available is not reliable,
// tries to read the rest of the data.
DrillBuf skipBuf = dataReader.getNext((int) bytesToSkip);
if (skipBuf != null) {
skipBuf.release();
} else {
throw new EOFException("End of File reachecd.");
}
}
}
long start = dataReader.getPos();
timer.start();
final PageHeader pageHeader = Util.readPageHeader(f);
long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
long pageHeaderBytes = dataReader.getPos() - start;
this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
assert pageHeader.type == PageType.DICTIONARY_PAGE;
readDictionaryPage(pageHeader, parentStatus);
}
}
Aggregations