Search in sources :

Example 6 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method getCustomUDFExpression.

/*
   * Return vector expression for a custom (i.e. not built-in) UDF.
   */
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        TypeInfo resultTypeInfo = expr.getTypeInfo();
        if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    // GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
    List<ExprNodeDesc> childExprList = expr.getChildren();
    final int childrenCount = childExprList.size();
    // argument descriptors
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
    for (int i = 0; i < argDescs.length; i++) {
        argDescs[i] = new VectorUDFArgDesc();
    }
    // positions of variable arguments (columns or non-constant expressions)
    List<Integer> variableArgPositions = new ArrayList<>();
    // Column numbers of batch corresponding to expression result arguments
    List<Integer> exprResultColumnNums = new ArrayList<>();
    // Prepare children
    List<VectorExpression> vectorExprs = new ArrayList<>();
    TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
    for (int i = 0; i < childrenCount; i++) {
        ExprNodeDesc child = childExprList.get(i);
        inputTypeInfos[i] = child.getTypeInfo();
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if (child instanceof ExprNodeGenericFuncDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else if (child instanceof ExprNodeColumnDesc) {
            variableArgPositions.add(i);
            argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
        } else if (child instanceof ExprNodeConstantDesc) {
            // this is a constant (or null)
            if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && child.getTypeInfo().getCategory() != Category.STRUCT) {
                // Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
                throw new HiveException("Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
            }
            argDescs[i].setConstant((ExprNodeConstantDesc) child);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else if (child instanceof ExprNodeFieldDesc) {
            // Get the GenericUDFStructField to process the field of Struct type
            VectorExpression e = getGenericUDFStructField((ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, child.getTypeInfo());
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else {
            throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
        }
    }
    // Allocate output column and get column number;
    TypeInfo resultTypeInfo = expr.getTypeInfo();
    String resultTypeName = resultTypeInfo.getTypeName();
    final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
    // Make vectorized operator
    VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
    ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions);
    // Set child expressions
    VectorExpression[] childVEs = null;
    if (exprResultColumnNums.size() != 0) {
        childVEs = new VectorExpression[exprResultColumnNums.size()];
        for (int i = 0; i < childVEs.length; i++) {
            childVEs[i] = vectorExprs.get(i);
        }
    }
    ve.setChildExpressions(childVEs);
    ve.setInputTypeInfos(inputTypeInfos);
    ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    ve.setOutputTypeInfo(resultTypeInfo);
    ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
    // Free output columns if inputs have non-leaf expression trees.
    for (Integer i : exprResultColumnNums) {
        ocm.freeOutputColumn(i);
    }
    if (isFilter) {
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorUDFArgDesc(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Aggregations

ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)6 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)6 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)6 SelectColumnIsTrue (org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)6 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)5 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)5 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)5 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 ArrayList (java.util.ArrayList)2 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)2 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)2 CastDecimalToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString)2 CastDoubleToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString)2 CastFloatToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString)2