use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getCustomUDFExpression.
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
TypeInfo resultTypeInfo = expr.getTypeInfo();
if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
// GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
List<ExprNodeDesc> childExprList = expr.getChildren();
final int childrenCount = childExprList.size();
// argument descriptors
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
for (int i = 0; i < argDescs.length; i++) {
argDescs[i] = new VectorUDFArgDesc();
}
// positions of variable arguments (columns or non-constant expressions)
List<Integer> variableArgPositions = new ArrayList<>();
// Column numbers of batch corresponding to expression result arguments
List<Integer> exprResultColumnNums = new ArrayList<>();
// Prepare children
List<VectorExpression> vectorExprs = new ArrayList<>();
TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
for (int i = 0; i < childrenCount; i++) {
ExprNodeDesc child = childExprList.get(i);
inputTypeInfos[i] = child.getTypeInfo();
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeColumnDesc) {
variableArgPositions.add(i);
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
// this is a constant (or null)
if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && child.getTypeInfo().getCategory() != Category.STRUCT) {
// Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
throw new HiveException("Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
}
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else if (child instanceof ExprNodeDynamicValueDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeFieldDesc) {
// Get the GenericUDFStructField to process the field of Struct type
VectorExpression e = getGenericUDFStructField((ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, child.getTypeInfo());
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else {
throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
}
}
// Allocate output column and get column number;
TypeInfo resultTypeInfo = expr.getTypeInfo();
String resultTypeName = resultTypeInfo.getTypeName();
final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
// Make vectorized operator
VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions);
// Set child expressions
VectorExpression[] childVEs = null;
if (exprResultColumnNums.size() != 0) {
childVEs = new VectorExpression[exprResultColumnNums.size()];
for (int i = 0; i < childVEs.length; i++) {
childVEs[i] = vectorExprs.get(i);
}
}
ve.setChildExpressions(childVEs);
ve.setInputTypeInfos(inputTypeInfos);
ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
ve.setOutputTypeInfo(resultTypeInfo);
ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
// Free output columns if inputs have non-leaf expression trees.
for (Integer i : exprResultColumnNums) {
ocm.freeOutputColumn(i);
}
if (isFilter) {
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return ve;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getVectorExpressions.
public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes, VectorExpressionDescriptor.Mode mode) throws HiveException {
int i = 0;
if (null == exprNodes) {
return new VectorExpression[0];
}
VectorExpression[] ret = new VectorExpression[exprNodes.size()];
for (ExprNodeDesc e : exprNodes) {
ret[i++] = getVectorExpression(e, mode);
}
return ret;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method doGetIfExpression.
private VectorExpression doGetIfExpression(GenericUDFIf genericUDFIf, List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException {
if (hiveVectorIfStmtMode == HiveVectorIfStmtMode.ADAPTOR) {
return null;
}
// Align the THEN/ELSE types.
childExpr = getChildExpressionsWithImplicitCast(genericUDFIf, childExpr, returnType);
final ExprNodeDesc ifDesc = Objects.requireNonNull(childExpr).get(0);
final ExprNodeDesc thenDesc = childExpr.get(1);
final ExprNodeDesc elseDesc = childExpr.get(2);
final boolean isThenNullConst = isNullConst(thenDesc);
final boolean isElseNullConst = isNullConst(elseDesc);
if (isThenNullConst && isElseNullConst) {
// THEN NULL ELSE NULL: An unusual "case", but possible.
final int outputColumnNum = ocm.allocateOutputColumn(returnType);
final VectorExpression resultExpr = new IfExprNullNull(outputColumnNum);
resultExpr.setOutputTypeInfo(returnType);
resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
return resultExpr;
}
final boolean isThenCondExpr = isCondExpr(thenDesc);
final boolean isElseCondExpr = isCondExpr(elseDesc);
final boolean isOnlyGood = (hiveVectorIfStmtMode == HiveVectorIfStmtMode.GOOD);
if (isThenNullConst) {
final VectorExpression whenExpr = getVectorExpression(ifDesc, VectorExpressionDescriptor.Mode.PROJECTION);
final VectorExpression elseExpr = getVectorExpression(elseDesc, VectorExpressionDescriptor.Mode.PROJECTION);
DataTypePhysicalVariation outputDataTypePhysicalVariation = (elseExpr.getOutputDataTypePhysicalVariation() == null) ? DataTypePhysicalVariation.NONE : elseExpr.getOutputDataTypePhysicalVariation();
final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
final VectorExpression resultExpr;
if (!isElseCondExpr || isOnlyGood) {
resultExpr = new IfExprNullColumn(whenExpr.getOutputColumnNum(), elseExpr.getOutputColumnNum(), outputColumnNum);
} else {
resultExpr = new IfExprNullCondExpr(whenExpr.getOutputColumnNum(), elseExpr.getOutputColumnNum(), outputColumnNum);
}
resultExpr.setChildExpressions(new VectorExpression[] { whenExpr, elseExpr });
resultExpr.setInputTypeInfos(whenExpr.getOutputTypeInfo(), TypeInfoFactory.voidTypeInfo, elseExpr.getOutputTypeInfo());
resultExpr.setInputDataTypePhysicalVariations(whenExpr.getOutputDataTypePhysicalVariation(), outputDataTypePhysicalVariation, elseExpr.getOutputDataTypePhysicalVariation());
resultExpr.setOutputTypeInfo(returnType);
resultExpr.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
return resultExpr;
}
if (isElseNullConst) {
final VectorExpression whenExpr = getVectorExpression(ifDesc, VectorExpressionDescriptor.Mode.PROJECTION);
final VectorExpression thenExpr = getVectorExpression(thenDesc, VectorExpressionDescriptor.Mode.PROJECTION);
DataTypePhysicalVariation outputDataTypePhysicalVariation = (thenExpr.getOutputDataTypePhysicalVariation() == null) ? DataTypePhysicalVariation.NONE : thenExpr.getOutputDataTypePhysicalVariation();
final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
final VectorExpression resultExpr;
if (!isThenCondExpr || isOnlyGood) {
resultExpr = new IfExprColumnNull(whenExpr.getOutputColumnNum(), thenExpr.getOutputColumnNum(), outputColumnNum);
} else {
resultExpr = new IfExprCondExprNull(whenExpr.getOutputColumnNum(), thenExpr.getOutputColumnNum(), outputColumnNum);
}
resultExpr.setChildExpressions(new VectorExpression[] { whenExpr, thenExpr });
resultExpr.setInputTypeInfos(whenExpr.getOutputTypeInfo(), thenExpr.getOutputTypeInfo(), TypeInfoFactory.voidTypeInfo);
resultExpr.setInputDataTypePhysicalVariations(whenExpr.getOutputDataTypePhysicalVariation(), thenExpr.getOutputDataTypePhysicalVariation(), outputDataTypePhysicalVariation);
resultExpr.setOutputTypeInfo(returnType);
resultExpr.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
return resultExpr;
}
if ((isThenCondExpr || isElseCondExpr) && !isOnlyGood) {
final VectorExpression whenExpr = getVectorExpression(ifDesc, VectorExpressionDescriptor.Mode.PROJECTION);
final VectorExpression thenExpr = getVectorExpression(thenDesc, VectorExpressionDescriptor.Mode.PROJECTION);
final VectorExpression elseExpr = getVectorExpression(elseDesc, VectorExpressionDescriptor.Mode.PROJECTION);
// Only proceed if the THEN/ELSE types were aligned.
if (thenExpr.getOutputColumnVectorType() == elseExpr.getOutputColumnVectorType()) {
DataTypePhysicalVariation outputDataTypePhysicalVariation = (thenExpr.getOutputDataTypePhysicalVariation() == elseExpr.getOutputDataTypePhysicalVariation() && thenExpr.getOutputDataTypePhysicalVariation() != null) ? thenExpr.getOutputDataTypePhysicalVariation() : DataTypePhysicalVariation.NONE;
final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
final VectorExpression resultExpr;
if (isThenCondExpr && isElseCondExpr) {
resultExpr = new IfExprCondExprCondExpr(whenExpr.getOutputColumnNum(), thenExpr.getOutputColumnNum(), elseExpr.getOutputColumnNum(), outputColumnNum);
} else if (isThenCondExpr) {
resultExpr = new IfExprCondExprColumn(whenExpr.getOutputColumnNum(), thenExpr.getOutputColumnNum(), elseExpr.getOutputColumnNum(), outputColumnNum);
} else {
resultExpr = new IfExprColumnCondExpr(whenExpr.getOutputColumnNum(), thenExpr.getOutputColumnNum(), elseExpr.getOutputColumnNum(), outputColumnNum);
}
resultExpr.setChildExpressions(new VectorExpression[] { whenExpr, thenExpr, elseExpr });
resultExpr.setInputTypeInfos(whenExpr.getOutputTypeInfo(), thenExpr.getOutputTypeInfo(), elseExpr.getOutputTypeInfo());
resultExpr.setInputDataTypePhysicalVariations(whenExpr.getOutputDataTypePhysicalVariation(), thenExpr.getOutputDataTypePhysicalVariation(), elseExpr.getOutputDataTypePhysicalVariation());
resultExpr.setOutputTypeInfo(returnType);
resultExpr.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
return resultExpr;
}
}
Class<?> udfClass = genericUDFIf.getClass();
return getVectorExpressionForUdf(genericUDFIf, udfClass, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method createVectorExpression.
private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType, DataTypePhysicalVariation returnDataTypePhysicalVariation) throws HiveException {
int numChildren = childExpr == null ? 0 : childExpr.size();
TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
List<VectorExpression> children = new ArrayList<>();
Object[] arguments = new Object[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
TypeInfo childTypeInfo = child.getTypeInfo();
inputTypeInfos[i] = childTypeInfo;
// Assume.
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
if (childTypeInfo instanceof DecimalTypeInfo) {
// In this method, we must only process non-Decimal64 column vectors.
// Convert Decimal64 columns to regular decimal.
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
// FUTURE: Can we reuse this conversion?
VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
continue;
}
}
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
// In filter mode, the column must be a boolean
SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
children.add(selectColumnIsTrue);
}
arguments[i] = colIndex;
} else if (child instanceof ExprNodeConstantDesc) {
Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
} else if (child instanceof ExprNodeDynamicValueDesc) {
arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, returnDataTypePhysicalVariation, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
}
Objects.requireNonNull(vectorExpression).setInputTypeInfos(inputTypeInfos);
vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
if (!children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
freeNonColumns(children.toArray(new VectorExpression[0]));
return vectorExpression;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method createDecimal64VectorExpression.
@SuppressWarnings("null")
private VectorExpression createDecimal64VectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExprs, VectorExpressionDescriptor.Mode childrenMode, boolean isDecimal64ScaleEstablished, int decimal64ColumnScale, TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation, boolean dontRescaleArguments, GenericUDF genericUdf) throws HiveException {
final int numChildren = childExprs.size();
VectorExpression vectorExpression = null;
boolean oldTryDecimal64Cast = this.tryDecimal64Cast;
tryDecimal64Cast = true;
try {
List<VectorExpression> children = new ArrayList<>();
Object[] arguments = new Object[numChildren];
TypeInfo[] typeInfos = new TypeInfo[numChildren];
DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc childExpr = childExprs.get(i);
TypeInfo typeInfo = childExpr.getTypeInfo();
typeInfos[i] = typeInfo;
dataTypePhysicalVariations[i] = (checkTypeInfoForDecimal64(typeInfo) ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
if (childExpr instanceof ExprNodeGenericFuncDesc) {
VectorExpression vChild = getVectorExpression(childExpr, childrenMode);
if (genericUdf instanceof GenericUDFBaseBinary && vChild.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.NONE) {
return null;
}
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
} else if (childExpr instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) childExpr);
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
VectorExpression filterExpr = getFilterOnBooleanColumnExpression((ExprNodeColumnDesc) childExpr, colIndex);
if (filterExpr == null) {
return null;
}
children.add(filterExpr);
}
arguments[i] = colIndex;
} else if (childExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childExpr;
if (typeInfo instanceof DecimalTypeInfo) {
if (!isDecimal64ScaleEstablished) {
return null;
}
HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
if (hiveDecimal.scale() > decimal64ColumnScale) {
// For now, bail out on decimal constants with larger scale than column scale.
return null;
}
if (dontRescaleArguments) {
arguments[i] = new HiveDecimalWritable(hiveDecimal).serialize64(hiveDecimal.scale());
} else {
arguments[i] = new HiveDecimalWritable(hiveDecimal).serialize64(decimal64ColumnScale);
}
} else {
Object scalarValue = getVectorTypeScalarValue(constDesc);
arguments[i] = (scalarValue == null) ? getConstantVectorExpression(null, typeInfo, childrenMode) : scalarValue;
}
} else {
return null;
}
}
/*
* Instantiate Decimal64 vector expression.
*
* The instantiateExpression method sets the output column and type information.
*/
vectorExpression = instantiateExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments);
}
Objects.requireNonNull(vectorExpression).setInputTypeInfos(typeInfos);
vectorExpression.setInputDataTypePhysicalVariations(dataTypePhysicalVariations);
if (!children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
} finally {
tryDecimal64Cast = oldTryDecimal64Cast;
}
return vectorExpression;
}
Aggregations