use of org.apache.parquet.schema.ColumnOrder in project parquet-mr by apache.
the class TestParquetMetadataConverter method testColumnOrders.
@Test
public void testColumnOrders() throws IOException {
MessageType schema = parseMessageType("message test {" + // Normal column with type defined column order -> typeDefined
" optional binary binary_col;" + " optional group map_col (MAP) {" + " repeated group map (MAP_KEY_VALUE) {" + // Key to be hacked to have unknown column order -> undefined
" required binary key (UTF8);" + " optional group list_col (LIST) {" + " repeated group list {" + // INT96 element with type defined column order -> undefined
" optional int96 array_element;" + " }" + " }" + " }" + " }" + "}");
org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap<String, String>(), null);
ParquetMetadata metadata = new ParquetMetadata(fileMetaData, new ArrayList<BlockMetaData>());
ParquetMetadataConverter converter = new ParquetMetadataConverter();
FileMetaData formatMetadata = converter.toParquetMetadata(1, metadata);
List<org.apache.parquet.format.ColumnOrder> columnOrders = formatMetadata.getColumn_orders();
assertEquals(3, columnOrders.size());
for (org.apache.parquet.format.ColumnOrder columnOrder : columnOrders) {
assertTrue(columnOrder.isSetTYPE_ORDER());
}
// Simulate that thrift got a union type that is not in the generated code
// (when the file contains a not-yet-supported column order)
columnOrders.get(1).clear();
MessageType resultSchema = converter.fromParquetMetadata(formatMetadata).getFileMetaData().getSchema();
List<ColumnDescriptor> columns = resultSchema.getColumns();
assertEquals(3, columns.size());
assertEquals(ColumnOrder.typeDefined(), columns.get(0).getPrimitiveType().columnOrder());
assertEquals(ColumnOrder.undefined(), columns.get(1).getPrimitiveType().columnOrder());
assertEquals(ColumnOrder.undefined(), columns.get(2).getPrimitiveType().columnOrder());
}
Aggregations