use of org.apache.parquet.schema.Type in project hive by apache.
the class DataWritableReadSupport method getProjectedGroupFields.
/**
* Searchs column names by name on a given Parquet schema, and returns its corresponded
* Parquet schema types.
*
* @param schema Group schema where to search for column names.
* @param colNames List of column names.
* @param colTypes List of column types.
* @return List of GroupType objects of projected columns.
*/
private static List<Type> getProjectedGroupFields(GroupType schema, List<String> colNames, List<TypeInfo> colTypes) {
List<Type> schemaTypes = new ArrayList<Type>();
ListIterator<String> columnIterator = colNames.listIterator();
Map<String, Type> schemaTypeMap = new HashMap<>();
schema.getFields().forEach(t -> schemaTypeMap.put(t.getName().toLowerCase(), t));
while (columnIterator.hasNext()) {
TypeInfo colType = colTypes.get(columnIterator.nextIndex());
String colName = columnIterator.next();
Type fieldType = schemaTypeMap.get(colName.toLowerCase());
if (fieldType == null) {
schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named(colName));
} else {
schemaTypes.add(getProjectedType(colType, fieldType));
}
}
return schemaTypes;
}
use of org.apache.parquet.schema.Type in project hive by apache.
the class HiveParquetSchemaTestUtils method testConversion.
public static void testConversion(final String columnNamesStr, final String columnsTypeStr, final String actualSchema, final Configuration conf) throws Exception {
final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes, conf);
final MessageType expectedMT = MessageTypeParser.parseMessageType(actualSchema);
assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + actualSchema, expectedMT, messageTypeFound);
// Required to check the original types manually as PrimitiveType.equals does not care about it
List<Type> expectedFields = expectedMT.getFields();
List<Type> actualFields = messageTypeFound.getFields();
for (int i = 0, n = expectedFields.size(); i < n; ++i) {
LogicalTypeAnnotation expectedLogicalType = expectedFields.get(i).getLogicalTypeAnnotation();
LogicalTypeAnnotation actualLogicalType = actualFields.get(i).getLogicalTypeAnnotation();
assertEquals("Logical type annotations of the field do not match", expectedLogicalType, actualLogicalType);
}
}
use of org.apache.parquet.schema.Type in project hive by apache.
the class HiveParquetSchemaTestUtils method testLogicalTypeAnnotations.
public static void testLogicalTypeAnnotations(final String hiveColumnNames, final String hiveColumnTypes, final Map<String, LogicalTypeAnnotation> expectedLogicalTypes, Configuration conf) throws Exception {
final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes, conf);
List<Type> actualFields = messageTypeFound.getFields();
for (Type actualField : actualFields) {
LogicalTypeAnnotation expectedLogicalType = expectedLogicalTypes.get(actualField.getName());
LogicalTypeAnnotation actualLogicalType = actualField.getLogicalTypeAnnotation();
if (expectedLogicalType != null) {
assertNotNull("The logical type annotation cannot be null.", actualLogicalType);
assertEquals("Logical type annotations of the field do not match", expectedLogicalType, actualLogicalType);
} else {
assertNull("The logical type annotation must be null.", actualLogicalType);
}
}
}
use of org.apache.parquet.schema.Type in project hive by apache.
the class TestHiveSchemaConverter method testListOriginalType.
@Test
public void testListOriginalType() throws Exception {
final MessageType messageTypeFound = createSchema("array<tinyint>", "arrayCol");
assertEquals(1, messageTypeFound.getFieldCount());
Type topLevel = messageTypeFound.getFields().get(0);
checkField(topLevel, "arrayCol", Repetition.OPTIONAL, LogicalTypeAnnotation.listType());
assertEquals(1, topLevel.asGroupType().getFieldCount());
Type secondLevel = topLevel.asGroupType().getFields().get(0);
checkField(secondLevel, "bag", Repetition.REPEATED, null);
assertEquals(1, secondLevel.asGroupType().getFieldCount());
Type thirdLevel = secondLevel.asGroupType().getFields().get(0);
checkField(thirdLevel, "array_element", Repetition.OPTIONAL, LogicalTypeAnnotation.intType(8, true));
}
use of org.apache.parquet.schema.Type in project Gaffer by gchq.
the class GafferElementConverter method buildFieldToConverter.
private Map<Integer, Converter> buildFieldToConverter(final MessageType schema) {
final Map<Integer, Converter> fieldToConverter = new HashMap<>(fieldCount);
int i = 0;
for (final Type field : schema.getFields()) {
if (field.isPrimitive()) {
fieldToConverter.put(i, new PrimitiveConverter(parquetColumnToObject, field.asPrimitiveType().getPrimitiveTypeName().javaType.getSimpleName(), new String[] { field.getName() }, field.getOriginalType()));
} else {
fieldToConverter.put(i, new BypassGroupConverter(parquetColumnToObject, field.asGroupType(), new String[] { field.getName() }));
}
i++;
}
return fieldToConverter;
}
Aggregations