use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class DumpCommand method dump.
public static void dump(PrettyPrintWriter out, ParquetMetadata meta, MessageType schema, Path inpath, boolean showmd, boolean showdt, Set<String> showColumns) throws IOException {
Configuration conf = new Configuration();
List<BlockMetaData> blocks = meta.getBlocks();
List<ColumnDescriptor> columns = schema.getColumns();
if (showColumns != null) {
columns = new ArrayList<ColumnDescriptor>();
for (ColumnDescriptor column : schema.getColumns()) {
String path = Joiner.on('.').skipNulls().join(column.getPath());
if (showColumns.contains(path)) {
columns.add(column);
}
}
}
ParquetFileReader freader = null;
if (showmd) {
try {
long group = 0;
for (BlockMetaData block : blocks) {
if (group != 0)
out.println();
out.format("row group %d%n", group++);
out.rule('-');
List<ColumnChunkMetaData> ccmds = block.getColumns();
if (showColumns != null) {
ccmds = new ArrayList<ColumnChunkMetaData>();
for (ColumnChunkMetaData ccmd : block.getColumns()) {
String path = Joiner.on('.').skipNulls().join(ccmd.getPath().toArray());
if (showColumns.contains(path)) {
ccmds.add(ccmd);
}
}
}
MetadataUtils.showDetails(out, ccmds);
List<BlockMetaData> rblocks = Collections.singletonList(block);
freader = new ParquetFileReader(conf, meta.getFileMetaData(), inpath, rblocks, columns);
PageReadStore store = freader.readNextRowGroup();
while (store != null) {
out.incrementTabLevel();
for (ColumnDescriptor column : columns) {
out.println();
dump(out, store, column);
}
out.decrementTabLevel();
store = freader.readNextRowGroup();
}
out.flushColumns();
}
} finally {
if (freader != null) {
freader.close();
}
}
}
if (showdt) {
boolean first = true;
for (ColumnDescriptor column : columns) {
if (!first || showmd)
out.println();
first = false;
out.format("%s %s%n", column.getType(), Joiner.on('.').skipNulls().join(column.getPath()));
out.rule('-');
try {
long page = 1;
long total = blocks.size();
long offset = 1;
freader = new ParquetFileReader(conf, meta.getFileMetaData(), inpath, blocks, Collections.singletonList(column));
PageReadStore store = freader.readNextRowGroup();
while (store != null) {
ColumnReadStoreImpl crstore = new ColumnReadStoreImpl(store, new DumpGroupConverter(), schema, meta.getFileMetaData().getCreatedBy());
dump(out, crstore, column, page++, total, offset);
offset += store.getRowCount();
store = freader.readNextRowGroup();
}
out.flushColumns();
} finally {
out.flushColumns();
if (freader != null) {
freader.close();
}
}
}
}
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class MetadataUtils method showDetails.
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
String name = Strings.repeat(".", depth) + type.getName();
OriginalType otype = type.getOriginalType();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (otype != null)
out.format(" O:%s", otype);
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[cpath.size()]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class DefaultValuesWriterFactoryTest method doTestValueWriter.
private void doTestValueWriter(PrimitiveTypeName typeName, WriterVersion version, boolean enableDictionary, Class<? extends ValuesWriter> expectedValueWriterClass) {
ColumnDescriptor mockPath = getMockColumn(typeName);
ValuesWriterFactory factory = getDefaultFactory(version, enableDictionary);
ValuesWriter writer = factory.newValuesWriter(mockPath);
validateWriterType(writer, expectedValueWriterClass);
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class DefaultValuesWriterFactoryTest method getMockColumn.
private ColumnDescriptor getMockColumn(PrimitiveTypeName typeName) {
ColumnDescriptor mockPath = mock(ColumnDescriptor.class);
when(mockPath.getType()).thenReturn(typeName);
return mockPath;
}
use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.
the class TestColumnReaderImpl method testOptional.
@Test
public void testOptional() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
ColumnDescriptor col = schema.getColumns().get(0);
MemPageWriter pageWriter = new MemPageWriter();
ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter, ParquetProperties.builder().withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0).withPageSize(2048).build());
for (int i = 0; i < rows; i++) {
columnWriterV2.writeNull(0, 0);
if ((i + 1) % 1000 == 0) {
columnWriterV2.writePage(i);
}
}
columnWriterV2.writePage(rows);
columnWriterV2.finalizeColumnChunk();
List<DataPage> pages = pageWriter.getPages();
int valueCount = 0;
int rowCount = 0;
for (DataPage dataPage : pages) {
valueCount += dataPage.getValueCount();
rowCount += ((DataPageV2) dataPage).getRowCount();
}
assertEquals(rows, rowCount);
assertEquals(rows, valueCount);
MemPageReader pageReader = new MemPageReader((long) rows, pages.iterator(), pageWriter.getDictionaryPage());
ValidatingConverter converter = new ValidatingConverter();
ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
for (int i = 0; i < rows; i++) {
assertEquals(0, columnReader.getCurrentRepetitionLevel());
assertEquals(0, columnReader.getCurrentDefinitionLevel());
columnReader.consume();
}
assertEquals(0, converter.count);
}
Aggregations