use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class TestColumnReaderImpl method test.
@Test
public void test() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
ColumnDescriptor col = schema.getColumns().get(0);
MemPageWriter pageWriter = new MemPageWriter();
ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter, ParquetProperties.builder().withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0).withPageSize(2048).build());
for (int i = 0; i < rows; i++) {
columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
if ((i + 1) % 1000 == 0) {
columnWriterV2.writePage(i);
}
}
columnWriterV2.writePage(rows);
columnWriterV2.finalizeColumnChunk();
List<DataPage> pages = pageWriter.getPages();
int valueCount = 0;
int rowCount = 0;
for (DataPage dataPage : pages) {
valueCount += dataPage.getValueCount();
rowCount += ((DataPageV2) dataPage).getRowCount();
}
assertEquals(rows, rowCount);
assertEquals(rows, valueCount);
MemPageReader pageReader = new MemPageReader((long) rows, pages.iterator(), pageWriter.getDictionaryPage());
ValidatingConverter converter = new ValidatingConverter();
ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
for (int i = 0; i < rows; i++) {
assertEquals(0, columnReader.getCurrentRepetitionLevel());
assertEquals(0, columnReader.getCurrentDefinitionLevel());
columnReader.writeCurrentValueToConverter();
columnReader.consume();
}
assertEquals(rows, converter.count);
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class TestMemPageStore method test.
@Test
public void test() throws IOException {
MemPageStore memPageStore = new MemPageStore(10);
ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2);
LongStatistics stats = new LongStatistics();
PageWriter pageWriter = memPageStore.getPageWriter(col);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
PageReader pageReader = memPageStore.getPageReader(col);
long totalValueCount = pageReader.getTotalValueCount();
System.out.println(totalValueCount);
int total = 0;
do {
DataPage readPage = pageReader.readPage();
total += readPage.getValueCount();
System.out.println(readPage);
// TODO: assert
} while (total < totalValueCount);
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class TestMemColumn method testMemColumnBinary.
@Test
public void testMemColumnBinary() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(Binary.fromString("42"), 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
columnReader.consume();
}
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class TestDictionary method initDicReader.
private DictionaryValuesReader initDicReader(ValuesWriter cw, PrimitiveTypeName type) throws IOException {
final DictionaryPage dictionaryPage = cw.toDictPageAndClose().copy();
final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] { "foo" }, type, 0, 0);
final Dictionary dictionary = PLAIN.initDictionary(descriptor, dictionaryPage);
final DictionaryValuesReader cr = new DictionaryValuesReader(dictionary);
return cr;
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class PrintFooter method add.
private static void add(ParquetMetadata footer) {
for (BlockMetaData blockMetaData : footer.getBlocks()) {
++blockCount;
MessageType schema = footer.getFileMetaData().getSchema();
recordCount += blockMetaData.getRowCount();
List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
add(desc, columnMetaData.getValueCount(), columnMetaData.getTotalSize(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getEncodings(), columnMetaData.getStatistics());
}
}
}
Aggregations