use of org.apache.parquet.schema.GroupType in project hive by apache.
the class DataWritableReadSupport method getProjectedGroupFields.
/**
* Searchs column names by name on a given Parquet schema, and returns its corresponded
* Parquet schema types.
*
* @param schema Group schema where to search for column names.
* @param colNames List of column names.
* @param colTypes List of column types.
* @return List of GroupType objects of projected columns.
*/
private static List<Type> getProjectedGroupFields(GroupType schema, List<String> colNames, List<TypeInfo> colTypes) {
List<Type> schemaTypes = new ArrayList<Type>();
ListIterator<String> columnIterator = colNames.listIterator();
while (columnIterator.hasNext()) {
TypeInfo colType = colTypes.get(columnIterator.nextIndex());
String colName = columnIterator.next();
Type fieldType = getFieldTypeIgnoreCase(schema, colName);
if (fieldType == null) {
schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named(colName));
} else {
schemaTypes.add(getProjectedType(colType, fieldType));
}
}
return schemaTypes;
}
use of org.apache.parquet.schema.GroupType in project drill by apache.
the class Metadata method getColTypeInfo.
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
if (type.isPrimitive()) {
PrimitiveType primitiveType = (PrimitiveType) type;
int precision = 0;
int scale = 0;
if (primitiveType.getDecimalMetadata() != null) {
precision = primitiveType.getDecimalMetadata().getPrecision();
scale = primitiveType.getDecimalMetadata().getScale();
}
int repetitionLevel = schema.getMaxRepetitionLevel(path);
int definitionLevel = schema.getMaxDefinitionLevel(path);
return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
}
Type t = ((GroupType) type).getType(path[depth]);
return getColTypeInfo(schema, t, path, depth + 1);
}
use of org.apache.parquet.schema.GroupType in project drill by apache.
the class ParquetSchemaMerge method main.
public static void main(String[] args) {
MessageType message1;
MessageType message2;
PrimitiveType c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "c");
GroupType b = new GroupType(Repetition.REQUIRED, "b");
GroupType a = new GroupType(Repetition.OPTIONAL, "a", b);
message1 = new MessageType("root", a);
PrimitiveType c2 = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "d");
GroupType b2 = new GroupType(Repetition.OPTIONAL, "b", c2);
GroupType a2 = new GroupType(Repetition.OPTIONAL, "a", b2);
message2 = new MessageType("root", a2);
MessageType message3 = message1.union(message2);
StringBuilder builder = new StringBuilder();
message3.writeToStringBuilder(builder, "");
System.out.println(builder);
}
use of org.apache.parquet.schema.GroupType in project hive by apache.
the class DataWritableWriter method createWriter.
/**
* Creates a writer for the specific object inspector. The returned writer will be used
* to call Parquet API for the specific data type.
* @param inspector The object inspector used to get the correct value type.
* @param type Type that contains information about the type schema.
* @return A ParquetWriter object used to call the Parquet API fo the specific data type.
*/
private DataWriter createWriter(ObjectInspector inspector, Type type) {
if (type.isPrimitive()) {
checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspector;
switch(primitiveInspector.getPrimitiveCategory()) {
case BOOLEAN:
return new BooleanDataWriter((BooleanObjectInspector) inspector);
case BYTE:
return new ByteDataWriter((ByteObjectInspector) inspector);
case SHORT:
return new ShortDataWriter((ShortObjectInspector) inspector);
case INT:
return new IntDataWriter((IntObjectInspector) inspector);
case LONG:
return new LongDataWriter((LongObjectInspector) inspector);
case FLOAT:
return new FloatDataWriter((FloatObjectInspector) inspector);
case DOUBLE:
return new DoubleDataWriter((DoubleObjectInspector) inspector);
case STRING:
return new StringDataWriter((StringObjectInspector) inspector);
case CHAR:
return new CharDataWriter((HiveCharObjectInspector) inspector);
case VARCHAR:
return new VarcharDataWriter((HiveVarcharObjectInspector) inspector);
case BINARY:
return new BinaryDataWriter((BinaryObjectInspector) inspector);
case TIMESTAMP:
return new TimestampDataWriter((TimestampObjectInspector) inspector);
case DECIMAL:
return new DecimalDataWriter((HiveDecimalObjectInspector) inspector);
case DATE:
return new DateDataWriter((DateObjectInspector) inspector);
default:
throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
}
} else {
GroupType groupType = type.asGroupType();
OriginalType originalType = type.getOriginalType();
if (originalType != null && originalType.equals(OriginalType.LIST)) {
checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
return new ListDataWriter((ListObjectInspector) inspector, groupType);
} else if (originalType != null && originalType.equals(OriginalType.MAP)) {
checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
return new MapDataWriter((MapObjectInspector) inspector, groupType);
} else {
checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
return new StructDataWriter((StructObjectInspector) inspector, groupType);
}
}
}
use of org.apache.parquet.schema.GroupType in project hive by apache.
the class HiveSchemaConverter method convertMapType.
// An optional group containing a repeated anonymous group "map", containing
// 2 elements: "key", "value"
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo());
return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
Aggregations