use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project hive by apache.
the class FunctionRegistry method matchCost.
/**
* Returns -1 if passed does not match accepted. Otherwise return the cost
* (usually 0 for no conversion and 1 for conversion).
*/
public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) {
if (argumentAccepted.equals(argumentPassed) || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) {
// matches
return 0;
}
if (argumentPassed.equals(TypeInfoFactory.voidTypeInfo)) {
// passing null matches everything
return 0;
}
if (argumentPassed.getCategory().equals(Category.LIST) && argumentAccepted.getCategory().equals(Category.LIST)) {
// lists are compatible if and only-if the elements are compatible
TypeInfo argumentPassedElement = ((ListTypeInfo) argumentPassed).getListElementTypeInfo();
TypeInfo argumentAcceptedElement = ((ListTypeInfo) argumentAccepted).getListElementTypeInfo();
return matchCost(argumentPassedElement, argumentAcceptedElement, exact);
}
if (argumentPassed.getCategory().equals(Category.MAP) && argumentAccepted.getCategory().equals(Category.MAP)) {
// lists are compatible if and only-if the elements are compatible
TypeInfo argumentPassedKey = ((MapTypeInfo) argumentPassed).getMapKeyTypeInfo();
TypeInfo argumentAcceptedKey = ((MapTypeInfo) argumentAccepted).getMapKeyTypeInfo();
TypeInfo argumentPassedValue = ((MapTypeInfo) argumentPassed).getMapValueTypeInfo();
TypeInfo argumentAcceptedValue = ((MapTypeInfo) argumentAccepted).getMapValueTypeInfo();
int cost1 = matchCost(argumentPassedKey, argumentAcceptedKey, exact);
int cost2 = matchCost(argumentPassedValue, argumentAcceptedValue, exact);
if (cost1 < 0 || cost2 < 0) {
return -1;
}
return Math.max(cost1, cost2);
}
if (argumentAccepted.equals(TypeInfoFactory.unknownTypeInfo)) {
// but there is a conversion cost.
return 1;
}
if (!exact && TypeInfoUtils.implicitConvertible(argumentPassed, argumentAccepted)) {
return 1;
}
return -1;
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project nifi by apache.
the class NiFiOrcUtils method convertToORCObject.
public static Object convertToORCObject(TypeInfo typeInfo, Object o) {
if (o != null) {
if (typeInfo instanceof UnionTypeInfo) {
OrcUnion union = new OrcUnion();
// Need to find which of the union types correspond to the primitive object
TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(ObjectInspectorFactory.getReflectionObjectInspector(o.getClass(), ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
List<TypeInfo> unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos();
int index = 0;
while (index < unionTypeInfos.size() && !unionTypeInfos.get(index).equals(objectTypeInfo)) {
index++;
}
if (index < unionTypeInfos.size()) {
union.set((byte) index, convertToORCObject(objectTypeInfo, o));
} else {
throw new IllegalArgumentException("Object Type for class " + o.getClass().getName() + " not in Union declaration");
}
return union;
}
if (o instanceof Integer) {
return new IntWritable((int) o);
}
if (o instanceof Boolean) {
return new BooleanWritable((boolean) o);
}
if (o instanceof Long) {
return new LongWritable((long) o);
}
if (o instanceof Float) {
return new FloatWritable((float) o);
}
if (o instanceof Double) {
return new DoubleWritable((double) o);
}
if (o instanceof String || o instanceof Utf8 || o instanceof GenericData.EnumSymbol) {
return new Text(o.toString());
}
if (o instanceof ByteBuffer) {
return new BytesWritable(((ByteBuffer) o).array());
}
if (o instanceof int[]) {
int[] intArray = (int[]) o;
return Arrays.stream(intArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("int"), element)).collect(Collectors.toList());
}
if (o instanceof long[]) {
long[] longArray = (long[]) o;
return Arrays.stream(longArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("bigint"), element)).collect(Collectors.toList());
}
if (o instanceof float[]) {
float[] floatArray = (float[]) o;
return IntStream.range(0, floatArray.length).mapToDouble(i -> floatArray[i]).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("float"), (float) element)).collect(Collectors.toList());
}
if (o instanceof double[]) {
double[] doubleArray = (double[]) o;
return Arrays.stream(doubleArray).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("double"), element)).collect(Collectors.toList());
}
if (o instanceof boolean[]) {
boolean[] booleanArray = (boolean[]) o;
return IntStream.range(0, booleanArray.length).map(i -> booleanArray[i] ? 1 : 0).mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("boolean"), element == 1)).collect(Collectors.toList());
}
if (o instanceof GenericData.Array) {
GenericData.Array array = ((GenericData.Array) o);
// The type information in this case is interpreted as a List
TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
return array.stream().map((element) -> convertToORCObject(listTypeInfo, element)).collect(Collectors.toList());
}
if (o instanceof List) {
return o;
}
if (o instanceof Map) {
Map map = new HashMap();
TypeInfo keyInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
TypeInfo valueInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
// Unions are not allowed as key/value types, so if we convert the key and value objects,
// they should return Writable objects
((Map) o).forEach((key, value) -> {
Object keyObject = convertToORCObject(keyInfo, key);
Object valueObject = convertToORCObject(valueInfo, value);
if (keyObject == null) {
throw new IllegalArgumentException("Maps' key cannot be null");
}
map.put(keyObject, valueObject);
});
return map;
}
if (o instanceof GenericData.Record) {
GenericData.Record record = (GenericData.Record) o;
TypeInfo recordSchema = NiFiOrcUtils.getOrcField(record.getSchema());
List<Schema.Field> recordFields = record.getSchema().getFields();
if (recordFields != null) {
Object[] fieldObjects = new Object[recordFields.size()];
for (int i = 0; i < recordFields.size(); i++) {
Schema.Field field = recordFields.get(i);
Schema fieldSchema = field.schema();
Object fieldObject = record.get(field.name());
fieldObjects[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), fieldObject);
}
return NiFiOrcUtils.createOrcStruct(recordSchema, fieldObjects);
}
}
throw new IllegalArgumentException("Error converting object of type " + o.getClass().getName() + " to ORC type " + typeInfo.getTypeName());
} else {
return null;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project parquet-mr by apache.
the class ArrayWritableObjectInspector method getObjectInspector.
private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
return ParquetPrimitiveInspectorFactory.parquetStringInspector;
} else if (typeInfo.getCategory().equals(Category.STRUCT)) {
return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
} else if (typeInfo.getCategory().equals(Category.LIST)) {
final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
} else if (typeInfo.getCategory().equals(Category.MAP)) {
final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
} else {
return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
}
} else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
throw new UnsupportedOperationException("timestamp not implemented yet");
} else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
return ParquetPrimitiveInspectorFactory.parquetByteInspector;
} else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
return ParquetPrimitiveInspectorFactory.parquetShortInspector;
} else {
throw new IllegalArgumentException("Unknown field info: " + typeInfo);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project cdap by caskdata.
the class ObjectSerializer method fromLazyObject.
private Object fromLazyObject(TypeInfo type, Object data) {
if (data == null) {
return null;
}
switch(type.getCategory()) {
case PRIMITIVE:
Writable writable = ((LazyPrimitive) data).getWritableObject();
return fromWritable(writable);
case LIST:
ListTypeInfo listType = (ListTypeInfo) type;
TypeInfo listElementType = listType.getListElementTypeInfo();
List<Object> list = ((LazyArray) data).getList();
if (list.isEmpty()) {
return ImmutableList.of();
}
Object[] arrayContent = new Object[list.size()];
for (int i = 0; i < arrayContent.length; i++) {
arrayContent[i] = fromLazyObject(listElementType, list.get(i));
}
return arrayContent;
case MAP:
MapTypeInfo mapType = (MapTypeInfo) type;
Map<Object, Object> mapContent = Maps.newConcurrentMap();
Map<Object, Object> map = ((LazyMap) data).getMap();
for (Map.Entry<Object, Object> entry : map.entrySet()) {
mapContent.put(fromLazyObject(mapType.getMapKeyTypeInfo(), entry.getKey()), fromLazyObject(mapType.getMapValueTypeInfo(), entry.getValue()));
}
return mapContent;
case STRUCT:
StructTypeInfo structType = (StructTypeInfo) type;
List<TypeInfo> info = structType.getAllStructFieldTypeInfos();
List<String> names = structType.getAllStructFieldNames();
Map<String, Object> structMap = Maps.newConcurrentMap();
List<Object> struct = ((LazyStruct) data).getFieldsAsList();
for (int structIndex = 0; structIndex < info.size(); structIndex++) {
structMap.put(names.get(structIndex), fromLazyObject(info.get(structIndex), struct.get(structIndex)));
}
return structMap;
case UNION:
throw new UnsupportedOperationException("union not yet supported");
default:
return data.toString();
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project cdap by caskdata.
the class ObjectDeserializer method deserializeField.
/**
* Translate a field that fits a {@link Schema} field into a type that Hive understands.
* For example, a ByteBuffer is allowed by schema but Hive only understands byte arrays, so all ByteBuffers must
* be changed into byte arrays. Reflection is used to examine java objects if the expected hive type is a struct.
*
* @param field value of the field to deserialize.
* @param typeInfo type of the field as expected by Hive.
* @param schema schema of the field.
* @return translated field.
* @throws NoSuchFieldException if a struct field was expected but not found in the object.
* @throws IllegalAccessException if a struct field was not accessible.
*/
private Object deserializeField(Object field, TypeInfo typeInfo, Schema schema) throws NoSuchFieldException, IllegalAccessException {
boolean isNullable = schema.isNullable();
if (field == null) {
if (isNullable) {
return null;
} else {
throw new UnexpectedFormatException("Non-nullable field was null.");
}
}
if (isNullable) {
schema = schema.getNonNullable();
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
return deserializePrimitive(field, (PrimitiveTypeInfo) typeInfo);
case LIST:
// HIVE!! some versions will turn bytes into array<tinyint> instead of binary... so special case it.
// TODO: remove once CDAP-1556 is done
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
if (isByteArray(listTypeInfo) && !(field instanceof Collection)) {
return deserializeByteArray(field);
}
return deserializeList(field, (ListTypeInfo) typeInfo, schema.getComponentSchema());
case MAP:
return deserializeMap(field, (MapTypeInfo) typeInfo, schema.getMapSchema());
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
ArrayList<String> innerFieldNames = structTypeInfo.getAllStructFieldNames();
ArrayList<TypeInfo> innerFieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
return flattenRecord(field, innerFieldNames, innerFieldTypes, schema);
case UNION:
// TODO: decide what to do here
return field;
}
return null;
}
Aggregations