use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project incubator-gobblin by apache.
the class HiveAvroORCQueryGenerator method escapeHiveType.
/**
* Escape the Hive nested field names.
* @param type Primitive or nested Hive type.
* @return Escaped Hive nested field.
*/
public static String escapeHiveType(String type) {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type);
// Primitive
if (ObjectInspector.Category.PRIMITIVE.equals(typeInfo.getCategory())) {
return type;
} else // List
if (ObjectInspector.Category.LIST.equals(typeInfo.getCategory())) {
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "<" + escapeHiveType(listTypeInfo.getListElementTypeInfo().getTypeName()) + ">";
} else // Map
if (ObjectInspector.Category.MAP.equals(typeInfo.getCategory())) {
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
return org.apache.hadoop.hive.serde.serdeConstants.MAP_TYPE_NAME + "<" + escapeHiveType(mapTypeInfo.getMapKeyTypeInfo().getTypeName()) + "," + escapeHiveType(mapTypeInfo.getMapValueTypeInfo().getTypeName()) + ">";
} else // Struct
if (ObjectInspector.Category.STRUCT.equals(typeInfo.getCategory())) {
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<String> allStructFieldNames = structTypeInfo.getAllStructFieldNames();
List<TypeInfo> allStructFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
StringBuilder sb = new StringBuilder();
sb.append(serdeConstants.STRUCT_TYPE_NAME + "<");
for (int i = 0; i < allStructFieldNames.size(); i++) {
if (i > 0) {
sb.append(",");
}
sb.append("`");
sb.append(allStructFieldNames.get(i));
sb.append("`");
sb.append(":");
sb.append(escapeHiveType(allStructFieldTypeInfos.get(i).getTypeName()));
}
sb.append(">");
return sb.toString();
} else // Union
if (ObjectInspector.Category.UNION.equals(typeInfo.getCategory())) {
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<TypeInfo> allUnionObjectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
StringBuilder sb = new StringBuilder();
sb.append(serdeConstants.UNION_TYPE_NAME + "<");
for (int i = 0; i < allUnionObjectTypeInfos.size(); i++) {
if (i > 0) {
sb.append(",");
}
sb.append(escapeHiveType(allUnionObjectTypeInfos.get(i).getTypeName()));
}
sb.append(">");
return sb.toString();
} else {
throw new RuntimeException("Unknown type encountered: " + type);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project incubator-gobblin by apache.
the class TypeInfoToSchemaParser method parseSchemaFromTypeInfo.
Schema parseSchemaFromTypeInfo(TypeInfo typeInfo, String recordNamespace, String recordName) {
Category c = typeInfo.getCategory();
Schema schema;
switch(c) {
case STRUCT:
schema = this.parseSchemaFromStruct((StructTypeInfo) typeInfo, recordNamespace, recordName);
break;
case LIST:
schema = this.parseSchemaFromList((ListTypeInfo) typeInfo, recordNamespace, recordName);
break;
case MAP:
schema = this.parseSchemaFromMap((MapTypeInfo) typeInfo, recordNamespace, recordName);
break;
case PRIMITIVE:
schema = this.parseSchemaFromPrimitive((PrimitiveTypeInfo) typeInfo);
break;
case UNION:
schema = this.parseSchemaFromUnion((UnionTypeInfo) typeInfo, recordNamespace, recordName);
break;
default:
throw new UnsupportedOperationException("Conversion from " + c + " not supported");
}
return this._mkFieldsOptional ? wrapInNullableUnion(schema) : schema;
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project phoenix by apache.
the class PhoenixObjectInspectorFactory method createObjectInspector.
public static ObjectInspector createObjectInspector(TypeInfo type, LazySerDeParameters serdeParams) {
ObjectInspector oi = null;
if (LOG.isDebugEnabled()) {
LOG.debug("Type : " + type);
}
switch(type.getCategory()) {
case PRIMITIVE:
switch(((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
case BOOLEAN:
oi = new PhoenixBooleanObjectInspector();
break;
case BYTE:
oi = new PhoenixByteObjectInspector();
break;
case SHORT:
oi = new PhoenixShortObjectInspector();
break;
case INT:
oi = new PhoenixIntObjectInspector();
break;
case LONG:
oi = new PhoenixLongObjectInspector();
break;
case FLOAT:
oi = new PhoenixFloatObjectInspector();
break;
case DOUBLE:
oi = new PhoenixDoubleObjectInspector();
break;
case VARCHAR:
// same string
case STRING:
oi = new PhoenixStringObjectInspector(serdeParams.isEscaped(), serdeParams.getEscapeChar());
break;
case CHAR:
oi = new PhoenixCharObjectInspector((PrimitiveTypeInfo) type);
break;
case DATE:
oi = new PhoenixDateObjectInspector();
break;
case TIMESTAMP:
oi = new PhoenixTimestampObjectInspector();
break;
case DECIMAL:
oi = new PhoenixDecimalObjectInspector((PrimitiveTypeInfo) type);
break;
case BINARY:
oi = new PhoenixBinaryObjectInspector();
break;
default:
throw new RuntimeException("Hive internal error. not supported data type " + ": " + type);
}
break;
case LIST:
if (LOG.isDebugEnabled()) {
LOG.debug("List type started");
}
ObjectInspector listElementObjectInspector = createObjectInspector(((ListTypeInfo) type).getListElementTypeInfo(), serdeParams);
if (LOG.isDebugEnabled()) {
LOG.debug("List type ended");
}
oi = new PhoenixListObjectInspector(listElementObjectInspector, serdeParams.getSeparators()[0], serdeParams);
break;
default:
throw new RuntimeException("Hive internal error. not supported data type : " + type);
}
return oi;
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project hive by apache.
the class VectorizedParquetRecordReader method buildVectorizedParquetReader.
// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
switch(typeInfo.getCategory()) {
case PRIMITIVE:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
if (fileSchema.getColumns().contains(descriptors.get(0))) {
return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
} else {
// Support for schema evolution
return new VectorizedDummyColumnReader();
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
List<Type> types = type.asGroupType().getFields();
for (int i = 0; i < fieldTypes.size(); i++) {
VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
if (r != null) {
fieldReaders.add(r);
} else {
throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
}
}
return new VectorizedStructColumnReader(fieldReaders);
case LIST:
checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
case MAP:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
// to handle the different Map definition in Parquet, eg:
// definition has 1 group:
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// definition has 2 groups:
// optional group m1 (MAP) {
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// }
int nestGroup = 0;
GroupType groupType = type.asGroupType();
// otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
while (groupType.getFieldCount() < 2) {
if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
}
groupType = groupType.getFields().get(0).asGroupType();
nestGroup++;
}
List<Type> kvTypes = groupType.getFields();
VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
case UNION:
default:
throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project hive by apache.
the class VectorSerializeRow method serializeListWrite.
private void serializeListWrite(ListColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
final ListTypeInfo typeInfo = (ListTypeInfo) field.typeInfo;
final ListObjectInspector objectInspector = (ListObjectInspector) field.objectInspector;
final ColumnVector childColumnVector = colVector.child;
final Field elementField = field.children[0];
final int offset = (int) colVector.offsets[adjustedBatchIndex];
final int size = (int) colVector.lengths[adjustedBatchIndex];
final ObjectInspector elementObjectInspector = objectInspector.getListElementObjectInspector();
final List list = (List) vectorExtractRow.extractRowColumn(colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginList(list);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateList();
}
serializeWrite(childColumnVector, elementField, offset + i);
}
serializeWrite.finishList();
}
Aggregations