use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project cdap by caskdata.
the class ObjectDeserializer method deserializeField.
/**
* Translate a field that fits a {@link Schema} field into a type that Hive understands.
* For example, a ByteBuffer is allowed by schema but Hive only understands byte arrays, so all ByteBuffers must
* be changed into byte arrays. Reflection is used to examine java objects if the expected hive type is a struct.
*
* @param field value of the field to deserialize.
* @param typeInfo type of the field as expected by Hive.
* @param schema schema of the field.
* @return translated field.
* @throws NoSuchFieldException if a struct field was expected but not found in the object.
* @throws IllegalAccessException if a struct field was not accessible.
*/
private Object deserializeField(Object field, TypeInfo typeInfo, Schema schema) throws NoSuchFieldException, IllegalAccessException {
boolean isNullable = schema.isNullable();
if (field == null) {
if (isNullable) {
return null;
} else {
throw new UnexpectedFormatException("Non-nullable field was null.");
}
}
if (isNullable) {
schema = schema.getNonNullable();
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
return deserializePrimitive(field, (PrimitiveTypeInfo) typeInfo);
case LIST:
// HIVE!! some versions will turn bytes into array<tinyint> instead of binary... so special case it.
// TODO: remove once CDAP-1556 is done
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
if (isByteArray(listTypeInfo) && !(field instanceof Collection)) {
return deserializeByteArray(field);
}
return deserializeList(field, (ListTypeInfo) typeInfo, schema.getComponentSchema());
case MAP:
return deserializeMap(field, (MapTypeInfo) typeInfo, schema.getMapSchema());
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
ArrayList<String> innerFieldNames = structTypeInfo.getAllStructFieldNames();
ArrayList<TypeInfo> innerFieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
return flattenRecord(field, innerFieldNames, innerFieldTypes, schema);
case UNION:
// TODO: decide what to do here
return field;
}
return null;
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class ExprNodeConverter method visitFieldAccess.
/**
* TODO: Handle 1) cast 2), Windowing Agg Call
*/
@Override
public /*
* Handles expr like struct(key,value).key
* Follows same rules as TypeCheckProcFactory::getXpathOrFuncExprNodeDesc()
* which is equivalent version of parsing such an expression from AST
*/
ExprNodeDesc visitFieldAccess(RexFieldAccess fieldAccess) {
ExprNodeDesc parent = fieldAccess.getReferenceExpr().accept(this);
String child = fieldAccess.getField().getName();
TypeInfo parentType = parent.getTypeInfo();
// Allow accessing a field of list element structs directly from a list
boolean isList = (parentType.getCategory() == ObjectInspector.Category.LIST);
if (isList) {
parentType = ((ListTypeInfo) parentType).getListElementTypeInfo();
}
TypeInfo t = ((StructTypeInfo) parentType).getStructFieldTypeInfo(child);
return new ExprNodeFieldDesc(t, parent, child, isList);
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorizedParquetRecordReader method buildVectorizedParquetReader.
// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
switch(typeInfo.getCategory()) {
case PRIMITIVE:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
if (fileSchema.getColumns().contains(descriptors.get(0))) {
return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
} else {
// Support for schema evolution
return new VectorizedDummyColumnReader();
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
List<Type> types = type.asGroupType().getFields();
for (int i = 0; i < fieldTypes.size(); i++) {
VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
if (r != null) {
fieldReaders.add(r);
} else {
throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
}
}
return new VectorizedStructColumnReader(fieldReaders);
case LIST:
checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
case MAP:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
// to handle the different Map definition in Parquet, eg:
// definition has 1 group:
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// definition has 2 groups:
// optional group m1 (MAP) {
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// }
int nestGroup = 0;
GroupType groupType = type.asGroupType();
// otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
while (groupType.getFieldCount() < 2) {
if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
}
groupType = groupType.getFields().get(0).asGroupType();
nestGroup++;
}
List<Type> kvTypes = groupType.getFields();
VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
case UNION:
default:
throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class SampleHBasePredicateDecomposer method getScanRange.
@Override
public HBaseScanRange getScanRange(List<IndexSearchCondition> searchConditions) throws Exception {
Map<String, List<IndexSearchCondition>> fieldConds = new HashMap<String, List<IndexSearchCondition>>();
for (IndexSearchCondition condition : searchConditions) {
String fieldName = condition.getFields()[0];
List<IndexSearchCondition> fieldCond = fieldConds.get(fieldName);
if (fieldCond == null) {
fieldConds.put(fieldName, fieldCond = new ArrayList<IndexSearchCondition>());
}
fieldCond.add(condition);
}
List<Filter> filters = new ArrayList<Filter>();
HBaseScanRange range = new HBaseScanRange();
StructTypeInfo type = (StructTypeInfo) keyMapping.columnType;
for (String name : type.getAllStructFieldNames()) {
List<IndexSearchCondition> fieldCond = fieldConds.get(name);
if (fieldCond == null || fieldCond.size() > 2) {
continue;
}
for (IndexSearchCondition condition : fieldCond) {
if (condition.getConstantDesc().getValue() == null) {
continue;
}
String comparisonOp = condition.getComparisonOp();
String constantVal = String.valueOf(condition.getConstantDesc().getValue());
byte[] valueAsBytes = toBinary(constantVal, FIXED_LENGTH, false, false);
if (comparisonOp.endsWith("UDFOPEqualOrGreaterThan")) {
filters.add(new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(valueAsBytes)));
} else if (comparisonOp.endsWith("UDFOPGreaterThan")) {
filters.add(new RowFilter(CompareOp.GREATER, new BinaryComparator(valueAsBytes)));
} else if (comparisonOp.endsWith("UDFOPEqualOrLessThan")) {
filters.add(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(valueAsBytes)));
} else if (comparisonOp.endsWith("UDFOPLessThan")) {
filters.add(new RowFilter(CompareOp.LESS, new BinaryComparator(valueAsBytes)));
} else {
throw new IOException(comparisonOp + " is not a supported comparison operator");
}
}
}
if (!filters.isEmpty()) {
range.addFilter(new FilterList(Operator.MUST_PASS_ALL, filters));
}
return range;
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class HCatRecordObjectInspectorFactory method getHCatRecordObjectInspector.
/**
* Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into
* @param typeInfo Type definition for the record to look into
* @return appropriate HCatRecordObjectInspector
* @throws SerDeException
*/
public static HCatRecordObjectInspector getHCatRecordObjectInspector(StructTypeInfo typeInfo) throws SerDeException {
HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo);
if (oi == null) {
LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName());
switch(typeInfo.getCategory()) {
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
}
oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors);
break;
default:
// - anything else is an error. Return null as the inspector.
throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() + "] was not of struct type - HCatRecord expected struct type, got [" + typeInfo.getCategory().toString() + "]");
}
cachedHCatRecordObjectInspectors.put(typeInfo, oi);
}
return oi;
}
Aggregations