use of org.apache.hadoop.hive.serde2.thrift.test.Complex in project hive by apache.
the class StatsUtils method getSizeOfComplexTypes.
/**
* Get the size of complex data types
* @param conf
* - hive conf
* @param oi
* - object inspector
* @return raw data size
*/
public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) {
long result = 0;
int length = 0;
int listEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_LIST_NUM_ENTRIES);
int mapEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_NUM_ENTRIES);
switch(oi.getCategory()) {
case PRIMITIVE:
String colTypeLowerCase = oi.getTypeName().toLowerCase();
if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
} else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
} else {
result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
break;
case LIST:
if (oi instanceof StandardConstantListObjectInspector) {
// constant list projection of known length
StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi;
length = scloi.getWritableConstantValue().size();
// check if list elements are primitive or Objects
ObjectInspector leoi = scloi.getListElementObjectInspector();
if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length);
} else {
result += JavaDataModel.get().lengthForObjectArrayOfSize(length);
}
} else {
StandardListObjectInspector sloi = (StandardListObjectInspector) oi;
// list overhead + (configured number of element in list * size of element)
long elemSize = getSizeOfComplexTypes(conf, sloi.getListElementObjectInspector());
result += JavaDataModel.get().arrayList() + (listEntries * elemSize);
}
break;
case MAP:
if (oi instanceof StandardConstantMapObjectInspector) {
// constant map projection of known length
StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi;
result += getSizeOfMap(scmoi);
} else {
StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi;
result += getSizeOfComplexTypes(conf, smoi.getMapKeyObjectInspector());
result += getSizeOfComplexTypes(conf, smoi.getMapValueObjectInspector());
// hash map overhead
result += JavaDataModel.get().hashMap(mapEntries);
}
break;
case STRUCT:
if (oi instanceof StandardConstantStructObjectInspector) {
// constant map projection of known length
StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector) oi;
result += getSizeOfStruct(scsoi);
} else {
StructObjectInspector soi = (StructObjectInspector) oi;
// add constant object overhead for struct
result += JavaDataModel.get().object();
// add constant struct field names references overhead
result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
for (StructField field : soi.getAllStructFieldRefs()) {
result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector());
}
}
break;
case UNION:
UnionObjectInspector uoi = (UnionObjectInspector) oi;
// add constant object overhead for union
result += JavaDataModel.get().object();
// add constant size for unions tags
result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1();
for (ObjectInspector foi : uoi.getObjectInspectors()) {
result += getSizeOfComplexTypes(conf, foi);
}
break;
default:
break;
}
return result;
}
use of org.apache.hadoop.hive.serde2.thrift.test.Complex in project hive by apache.
the class MapJoinBytesTableContainer method getComplexFieldsAsList.
/*
* For primitive types, use LazyBinary's object.
* For complex types, make a standard (Java) object from LazyBinary's object.
*/
public static List<Object> getComplexFieldsAsList(LazyBinaryStruct lazyBinaryStruct, ArrayList<Object> objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField field = fields.get(i);
ObjectInspector objectInspector = field.getFieldObjectInspector();
Category category = objectInspector.getCategory();
Object object = lazyBinaryStruct.getField(i);
if (category == Category.PRIMITIVE) {
objectArrayBuffer.set(i, object);
} else {
objectArrayBuffer.set(i, ObjectInspectorUtils.copyToStandardObject(object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
}
}
return objectArrayBuffer;
}
use of org.apache.hadoop.hive.serde2.thrift.test.Complex in project hive by apache.
the class VectorAssignRow method initConversion.
/**
* Initialize for conversion from a provided (source) data types to the target data types
* desired in the VectorizedRowBatch.
*
* No projection -- the column range 0 .. count-1
*
* where count is the minimum of the target data type array size, included array size,
* and source data type array size.
*
* @param sourceTypeInfos
* @param targetTypeInfos
* @param columnsToIncludeTruncated
* Flag array indicating which columns are to be included.
* "Truncated" because all false entries on the end of the array have been
* eliminated.
* @return the minimum count described above is returned. That is, the number of columns
* that will be processed by assign.
*/
public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos, boolean[] columnsToIncludeTruncated) {
int targetColumnCount;
if (columnsToIncludeTruncated == null) {
targetColumnCount = targetTypeInfos.length;
} else {
targetColumnCount = Math.min(targetTypeInfos.length, columnsToIncludeTruncated.length);
}
int sourceColumnCount = Math.min(sourceTypeInfos.length, targetColumnCount);
allocateArrays(sourceColumnCount);
allocateConvertArrays(sourceColumnCount);
for (int i = 0; i < sourceColumnCount; i++) {
if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) {
// Field not included in query.
} else {
TypeInfo targetTypeInfo = targetTypeInfos[i];
if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) {
// For now, we don't have an assigner for complex types...
} else {
TypeInfo sourceTypeInfo = sourceTypeInfos[i];
if (!sourceTypeInfo.equals(targetTypeInfo)) {
if (VectorPartitionConversion.isImplicitVectorColumnConversion(sourceTypeInfo, targetTypeInfo)) {
// Do implicit conversion accepting the source type and putting it in the same
// target type ColumnVector type.
initTargetEntry(i, i, sourceTypeInfo);
} else {
// Do formal conversion...
initTargetEntry(i, i, targetTypeInfo);
initConvertSourceEntry(i, sourceTypeInfo);
}
} else {
// No conversion.
initTargetEntry(i, i, targetTypeInfo);
}
}
}
}
return sourceColumnCount;
}
use of org.apache.hadoop.hive.serde2.thrift.test.Complex in project hive by apache.
the class ConstantPropagateProcFactory method evaluateFunction.
/**
* Evaluate UDF
*
* @param udf UDF object
* @param exprs
* @param oldExprs
* @return null if expression cannot be evaluated (not all parameters are constants). Or evaluated
* ExprNodeConstantDesc if possible.
* @throws HiveException
*/
private static ExprNodeDesc evaluateFunction(GenericUDF udf, List<ExprNodeDesc> exprs, List<ExprNodeDesc> oldExprs) {
DeferredJavaObject[] arguments = new DeferredJavaObject[exprs.size()];
ObjectInspector[] argois = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
ExprNodeDesc desc = exprs.get(i);
if (desc instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constant = (ExprNodeConstantDesc) exprs.get(i);
if (!constant.getTypeInfo().equals(oldExprs.get(i).getTypeInfo())) {
constant = typeCast(constant, oldExprs.get(i).getTypeInfo());
if (constant == null) {
return null;
}
}
if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
// nested complex types cannot be folded cleanly
return null;
}
Object value = constant.getValue();
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo();
Object writableValue = null == value ? value : PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti).getPrimitiveWritableObject(value);
arguments[i] = new DeferredJavaObject(writableValue);
argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
} else if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeDesc evaluatedFn = foldExpr((ExprNodeGenericFuncDesc) desc);
if (null == evaluatedFn || !(evaluatedFn instanceof ExprNodeConstantDesc)) {
return null;
}
ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn;
if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
// nested complex types cannot be folded cleanly
return null;
}
Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue());
arguments[i] = new DeferredJavaObject(writableValue);
argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
} else {
return null;
}
}
try {
ObjectInspector oi = udf.initialize(argois);
Object o = udf.evaluate(arguments);
if (LOG.isDebugEnabled()) {
LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o);
}
if (o == null) {
return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
}
Class<?> clz = o.getClass();
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
TypeInfo typeInfo = poi.getTypeInfo();
o = poi.getPrimitiveJavaObject(o);
if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) {
return new ExprNodeConstantDesc(typeInfo, o);
}
} else if (udf instanceof GenericUDFStruct && oi instanceof StandardConstantStructObjectInspector) {
// do not fold named_struct, only struct()
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi);
return new ExprNodeConstantDesc(structType, ObjectInspectorUtils.copyToStandardJavaObject(o, coi));
} else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
if (LOG.isErrorEnabled()) {
LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable.");
}
return null;
} else {
// fall through
}
String constStr = null;
if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
// remember original string representation of constant.
constStr = arguments[0].get().toString();
}
return new ExprNodeConstantDesc(o).setFoldedFromVal(constStr);
} catch (HiveException e) {
LOG.error("Evaluation function " + udf.getClass() + " failed in Constant Propagation Optimizer.");
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.serde2.thrift.test.Complex in project hive by apache.
the class TestSerDe method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fields.size(); i++) {
if (i > 0) {
sb.append(separator);
}
Object column = soi.getStructFieldData(obj, fields.get(i));
if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
// For primitive object, serialize to plain string
sb.append(column == null ? nullString : column.toString());
} else {
// For complex object, serialize to JSON format
sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
}
}
serializeCache.set(sb.toString());
return serializeCache;
}
Aggregations