Search in sources :

Example 1 with VectorUDFArgDesc

use of org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc in project hive by apache.

the class Vectorizer method fixDecimalDataTypePhysicalVariations.

private static VectorExpression fixDecimalDataTypePhysicalVariations(final VectorExpression parent, final VectorExpression[] children, final VectorizationContext vContext) throws HiveException {
    if (children == null || children.length == 0) {
        return parent;
    }
    for (int i = 0; i < children.length; i++) {
        VectorExpression child = children[i];
        VectorExpression newChild = fixDecimalDataTypePhysicalVariations(child, child.getChildExpressions(), vContext);
        if (child.getClass() == newChild.getClass() && child != newChild) {
            children[i] = newChild;
        }
    }
    if (parent.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.NONE && !(parent instanceof ConvertDecimal64ToDecimal)) {
        boolean inputArgsChanged = false;
        DataTypePhysicalVariation[] dataTypePhysicalVariations = parent.getInputDataTypePhysicalVariations();
        for (int i = 0; i < children.length; i++) {
            // we found at least one children with mismatch
            if (children[i].getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.DECIMAL_64) {
                children[i] = vContext.wrapWithDecimal64ToDecimalConversion(children[i]);
                inputArgsChanged = true;
                dataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
            }
        }
        // fix up the input column numbers and output column numbers
        if (inputArgsChanged) {
            if (parent instanceof VectorUDFAdaptor) {
                VectorUDFAdaptor parentAdaptor = (VectorUDFAdaptor) parent;
                VectorUDFArgDesc[] argDescs = parentAdaptor.getArgDescs();
                for (int i = 0; i < argDescs.length; ++i) {
                    if (argDescs[i].getColumnNum() != children[i].getOutputColumnNum()) {
                        argDescs[i].setColumnNum(children[i].getOutputColumnNum());
                        break;
                    }
                }
            } else {
                Object[] arguments;
                int argumentCount = children.length + (parent.getOutputColumnNum() == -1 ? 0 : 1);
                // Need to handle it as a special case to avoid instantiation failure.
                if (parent instanceof VectorCoalesce) {
                    arguments = new Object[2];
                    arguments[0] = new int[children.length];
                    for (int i = 0; i < children.length; i++) {
                        VectorExpression vce = children[i];
                        ((int[]) arguments[0])[i] = vce.getOutputColumnNum();
                    }
                    arguments[1] = parent.getOutputColumnNum();
                } else {
                    if (parent instanceof DecimalColDivideDecimalScalar) {
                        arguments = new Object[argumentCount + 1];
                        arguments[children.length] = ((DecimalColDivideDecimalScalar) parent).getValue();
                    } else {
                        arguments = new Object[argumentCount];
                    }
                    for (int i = 0; i < children.length; i++) {
                        VectorExpression vce = children[i];
                        arguments[i] = vce.getOutputColumnNum();
                    }
                }
                // retain output column number from parent
                if (parent.getOutputColumnNum() != -1) {
                    arguments[arguments.length - 1] = parent.getOutputColumnNum();
                }
                // re-instantiate the parent expression with new arguments
                VectorExpression newParent = vContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), parent.getOutputDataTypePhysicalVariation(), arguments);
                newParent.setOutputTypeInfo(parent.getOutputTypeInfo());
                newParent.setOutputDataTypePhysicalVariation(parent.getOutputDataTypePhysicalVariation());
                newParent.setInputTypeInfos(parent.getInputTypeInfos());
                newParent.setInputDataTypePhysicalVariations(dataTypePhysicalVariations);
                newParent.setChildExpressions(parent.getChildExpressions());
                return newParent;
            }
        }
    }
    return parent;
}
Also used : ConvertDecimal64ToDecimal(org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal) DecimalColDivideDecimalScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColDivideDecimalScalar) VectorCoalesce(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorUDFArgDesc(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with VectorUDFArgDesc

use of org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc in project hive by apache.

the class VectorizationContext method getCustomUDFExpression.

/*
   * Return vector expression for a custom (i.e. not built-in) UDF.
   */
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        TypeInfo resultTypeInfo = expr.getTypeInfo();
        if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    // GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
    List<ExprNodeDesc> childExprList = expr.getChildren();
    final int childrenCount = childExprList.size();
    // argument descriptors
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
    for (int i = 0; i < argDescs.length; i++) {
        argDescs[i] = new VectorUDFArgDesc();
    }
    // positions of variable arguments (columns or non-constant expressions)
    List<Integer> variableArgPositions = new ArrayList<>();
    // Column numbers of batch corresponding to expression result arguments
    List<Integer> exprResultColumnNums = new ArrayList<>();
    // Prepare children
    List<VectorExpression> vectorExprs = new ArrayList<>();
    TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
    for (int i = 0; i < childrenCount; i++) {
        ExprNodeDesc child = childExprList.get(i);
        inputTypeInfos[i] = child.getTypeInfo();
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if (child instanceof ExprNodeGenericFuncDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else if (child instanceof ExprNodeColumnDesc) {
            variableArgPositions.add(i);
            argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
        } else if (child instanceof ExprNodeConstantDesc) {
            // this is a constant (or null)
            if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && child.getTypeInfo().getCategory() != Category.STRUCT) {
                // Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
                throw new HiveException("Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
            }
            argDescs[i].setConstant((ExprNodeConstantDesc) child);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else if (child instanceof ExprNodeFieldDesc) {
            // Get the GenericUDFStructField to process the field of Struct type
            VectorExpression e = getGenericUDFStructField((ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, child.getTypeInfo());
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else {
            throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
        }
    }
    // Allocate output column and get column number;
    TypeInfo resultTypeInfo = expr.getTypeInfo();
    String resultTypeName = resultTypeInfo.getTypeName();
    final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
    // Make vectorized operator
    VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
    ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions);
    // Set child expressions
    VectorExpression[] childVEs = null;
    if (exprResultColumnNums.size() != 0) {
        childVEs = new VectorExpression[exprResultColumnNums.size()];
        for (int i = 0; i < childVEs.length; i++) {
            childVEs[i] = vectorExprs.get(i);
        }
    }
    ve.setChildExpressions(childVEs);
    ve.setInputTypeInfos(inputTypeInfos);
    ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    ve.setOutputTypeInfo(resultTypeInfo);
    ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
    // Free output columns if inputs have non-leaf expression trees.
    for (Integer i : exprResultColumnNums) {
        ocm.freeOutputColumn(i);
    }
    if (isFilter) {
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorUDFArgDesc(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Aggregations

DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)2 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)2 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)2 VectorUDFArgDesc (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc)2 ArrayList (java.util.ArrayList)1 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)1 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)1 CastDecimalToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString)1 CastDoubleToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString)1 CastFloatToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString)1 CastLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString)1 CastTimestampToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString)1 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)1 ConvertDecimal64ToDecimal (org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal)1 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)1 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)1 SelectColumnIsTrue (org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)1 VectorCoalesce (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce)1 DecimalColDivideDecimalScalar (org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColDivideDecimalScalar)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1