Search in sources :

Example 1 with VectorUDFAdaptor

use of org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor in project hive by apache.

the class VectorizationContext method getCustomUDFExpression.

/*
   * Return vector expression for a custom (i.e. not built-in) UDF.
   */
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        TypeInfo resultTypeInfo = expr.getTypeInfo();
        if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
    List<ExprNodeDesc> childExprList = expr.getChildren();
    // argument descriptors
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildren().size()];
    for (int i = 0; i < argDescs.length; i++) {
        argDescs[i] = new VectorUDFArgDesc();
    }
    // positions of variable arguments (columns or non-constant expressions)
    List<Integer> variableArgPositions = new ArrayList<Integer>();
    // Column numbers of batch corresponding to expression result arguments
    List<Integer> exprResultColumnNums = new ArrayList<Integer>();
    // Prepare children
    List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();
    for (int i = 0; i < childExprList.size(); i++) {
        ExprNodeDesc child = childExprList.get(i);
        if (child instanceof ExprNodeGenericFuncDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumn());
            argDescs[i].setVariable(e.getOutputColumn());
        } else if (child instanceof ExprNodeColumnDesc) {
            variableArgPositions.add(i);
            argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
        } else if (child instanceof ExprNodeConstantDesc) {
            // this is a constant (or null)
            argDescs[i].setConstant((ExprNodeConstantDesc) child);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumn());
            argDescs[i].setVariable(e.getOutputColumn());
        } else {
            throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
        }
    }
    // Allocate output column and get column number;
    int outputCol = -1;
    String resultTypeName = expr.getTypeInfo().getTypeName();
    outputCol = ocm.allocateOutputColumn(expr.getTypeInfo());
    // Make vectorized operator
    VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultTypeName, argDescs);
    // Set child expressions
    VectorExpression[] childVEs = null;
    if (exprResultColumnNums.size() != 0) {
        childVEs = new VectorExpression[exprResultColumnNums.size()];
        for (int i = 0; i < childVEs.length; i++) {
            childVEs[i] = vectorExprs.get(i);
        }
    }
    ve.setChildExpressions(childVEs);
    // Free output columns if inputs have non-leaf expression trees.
    for (Integer i : exprResultColumnNums) {
        ocm.freeOutputColumn(i);
    }
    if (isFilter) {
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputCol);
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) VectorUDFArgDesc(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ArrayList (java.util.ArrayList)1 VectorUDAFMaxString (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString)1 VectorUDAFMinString (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString)1 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)1 VectorUDFArgDesc (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeDynamicValueDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)1 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1