use of org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc in project hive by apache.
the class Vectorizer method fixDecimalDataTypePhysicalVariations.
private static VectorExpression fixDecimalDataTypePhysicalVariations(final VectorExpression parent, final VectorExpression[] children, final VectorizationContext vContext) throws HiveException {
if (children == null || children.length == 0) {
return parent;
}
for (int i = 0; i < children.length; i++) {
VectorExpression child = children[i];
VectorExpression newChild = fixDecimalDataTypePhysicalVariations(child, child.getChildExpressions(), vContext);
if (child.getClass() == newChild.getClass() && child != newChild) {
children[i] = newChild;
}
}
if (parent.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.NONE && !(parent instanceof ConvertDecimal64ToDecimal)) {
boolean inputArgsChanged = false;
DataTypePhysicalVariation[] dataTypePhysicalVariations = parent.getInputDataTypePhysicalVariations();
for (int i = 0; i < children.length; i++) {
// we found at least one children with mismatch
if (children[i].getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.DECIMAL_64) {
children[i] = vContext.wrapWithDecimal64ToDecimalConversion(children[i]);
inputArgsChanged = true;
dataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
}
}
// fix up the input column numbers and output column numbers
if (inputArgsChanged) {
if (parent instanceof VectorUDFAdaptor) {
VectorUDFAdaptor parentAdaptor = (VectorUDFAdaptor) parent;
VectorUDFArgDesc[] argDescs = parentAdaptor.getArgDescs();
for (int i = 0; i < argDescs.length; ++i) {
if (argDescs[i].getColumnNum() != children[i].getOutputColumnNum()) {
argDescs[i].setColumnNum(children[i].getOutputColumnNum());
break;
}
}
} else {
Object[] arguments;
int argumentCount = children.length + (parent.getOutputColumnNum() == -1 ? 0 : 1);
// Need to handle it as a special case to avoid instantiation failure.
if (parent instanceof VectorCoalesce) {
arguments = new Object[2];
arguments[0] = new int[children.length];
for (int i = 0; i < children.length; i++) {
VectorExpression vce = children[i];
((int[]) arguments[0])[i] = vce.getOutputColumnNum();
}
arguments[1] = parent.getOutputColumnNum();
} else {
if (parent instanceof DecimalColDivideDecimalScalar) {
arguments = new Object[argumentCount + 1];
arguments[children.length] = ((DecimalColDivideDecimalScalar) parent).getValue();
} else {
arguments = new Object[argumentCount];
}
for (int i = 0; i < children.length; i++) {
VectorExpression vce = children[i];
arguments[i] = vce.getOutputColumnNum();
}
}
// retain output column number from parent
if (parent.getOutputColumnNum() != -1) {
arguments[arguments.length - 1] = parent.getOutputColumnNum();
}
// re-instantiate the parent expression with new arguments
VectorExpression newParent = vContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), parent.getOutputDataTypePhysicalVariation(), arguments);
newParent.setOutputTypeInfo(parent.getOutputTypeInfo());
newParent.setOutputDataTypePhysicalVariation(parent.getOutputDataTypePhysicalVariation());
newParent.setInputTypeInfos(parent.getInputTypeInfos());
newParent.setInputDataTypePhysicalVariations(dataTypePhysicalVariations);
newParent.setChildExpressions(parent.getChildExpressions());
return newParent;
}
}
}
return parent;
}
use of org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc in project hive by apache.
the class VectorizationContext method getCustomUDFExpression.
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
TypeInfo resultTypeInfo = expr.getTypeInfo();
if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
// GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
List<ExprNodeDesc> childExprList = expr.getChildren();
final int childrenCount = childExprList.size();
// argument descriptors
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
for (int i = 0; i < argDescs.length; i++) {
argDescs[i] = new VectorUDFArgDesc();
}
// positions of variable arguments (columns or non-constant expressions)
List<Integer> variableArgPositions = new ArrayList<>();
// Column numbers of batch corresponding to expression result arguments
List<Integer> exprResultColumnNums = new ArrayList<>();
// Prepare children
List<VectorExpression> vectorExprs = new ArrayList<>();
TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
for (int i = 0; i < childrenCount; i++) {
ExprNodeDesc child = childExprList.get(i);
inputTypeInfos[i] = child.getTypeInfo();
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeColumnDesc) {
variableArgPositions.add(i);
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
// this is a constant (or null)
if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && child.getTypeInfo().getCategory() != Category.STRUCT) {
// Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
throw new HiveException("Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
}
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else if (child instanceof ExprNodeDynamicValueDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeFieldDesc) {
// Get the GenericUDFStructField to process the field of Struct type
VectorExpression e = getGenericUDFStructField((ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, child.getTypeInfo());
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else {
throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
}
}
// Allocate output column and get column number;
TypeInfo resultTypeInfo = expr.getTypeInfo();
String resultTypeName = resultTypeInfo.getTypeName();
final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
// Make vectorized operator
VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions);
// Set child expressions
VectorExpression[] childVEs = null;
if (exprResultColumnNums.size() != 0) {
childVEs = new VectorExpression[exprResultColumnNums.size()];
for (int i = 0; i < childVEs.length; i++) {
childVEs[i] = vectorExprs.get(i);
}
}
ve.setChildExpressions(childVEs);
ve.setInputTypeInfos(inputTypeInfos);
ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
ve.setOutputTypeInfo(resultTypeInfo);
ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
// Free output columns if inputs have non-leaf expression trees.
for (Integer i : exprResultColumnNums) {
ocm.freeOutputColumn(i);
}
if (isFilter) {
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return ve;
}
}
Aggregations