Search in sources :

Example 6 with GenericUDFBaseCompare

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare in project hive by apache.

the class RedundantDynamicPruningConditionsRemoval method collect.

private static void collect(ExprNodeDesc parent, ExprNodeDesc child, CollectContext listContext) {
    if (child instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) child).getGenericUDF() instanceof GenericUDFIn) {
        if (child.getChildren().get(1) instanceof ExprNodeDynamicListDesc) {
            listContext.dynamicListNodes.add(new Pair<ExprNodeDesc, ExprNodeDesc>(child, parent));
        }
        return;
    }
    if (child instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) child).getGenericUDF() instanceof GenericUDFBaseCompare && child.getChildren().size() == 2) {
        ExprNodeDesc leftCol = child.getChildren().get(0);
        ExprNodeDesc rightCol = child.getChildren().get(1);
        ExprNodeColumnDesc leftColDesc = ExprNodeDescUtils.getColumnExpr(leftCol);
        if (leftColDesc != null) {
            boolean rightConstant = false;
            if (rightCol instanceof ExprNodeConstantDesc) {
                rightConstant = true;
            } else if (rightCol instanceof ExprNodeGenericFuncDesc) {
                ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc) rightCol);
                rightConstant = foldedExpr != null;
            }
            if (rightConstant) {
                listContext.comparatorNodes.add(leftColDesc);
            }
        } else {
            ExprNodeColumnDesc rightColDesc = ExprNodeDescUtils.getColumnExpr(rightCol);
            if (rightColDesc != null) {
                boolean leftConstant = false;
                if (leftCol instanceof ExprNodeConstantDesc) {
                    leftConstant = true;
                } else if (leftCol instanceof ExprNodeGenericFuncDesc) {
                    ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc) leftCol);
                    leftConstant = foldedExpr != null;
                }
                if (leftConstant) {
                    listContext.comparatorNodes.add(rightColDesc);
                }
            }
        }
        return;
    }
    if (FunctionRegistry.isOpAnd(child)) {
        for (ExprNodeDesc newChild : child.getChildren()) {
            collect(child, newChild, listContext);
        }
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDynamicListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 7 with GenericUDFBaseCompare

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare in project hive by apache.

the class RexNodeConverter method convert.

private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
    ExprNodeDesc tmpExprNode;
    RexNode tmpRN;
    List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
    Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();
    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
    TypeInfo tgtDT = null;
    GenericUDF tgtUdf = func.getGenericUDF();
    boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
    boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
    boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
    boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0;
    boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween;
    boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn;
    boolean isAllPrimitive = true;
    if (isNumeric) {
        tgtDT = func.getTypeInfo();
        assert func.getChildren().size() == 2;
    // TODO: checking 2 children is useless, compare already does that.
    } else if (isCompare && (func.getChildren().size() == 2)) {
        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    } else if (isWhenCase) {
        // as they are not allowed
        if (checkForStatefulFunctions(func.getChildren())) {
            throw new SemanticException("Stateful expressions cannot be used inside of CASE");
        }
    } else if (isTransformableTimeStamp) {
        // unix_timestamp(args) -> to_unix_timestamp(args)
        func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
    } else if (isBetween) {
        assert func.getChildren().size() == 4;
        // We skip first child as is not involved (is the revert boolean)
        // The target type needs to account for all 3 operands
        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(1).getTypeInfo(), FunctionRegistry.getCommonClassForComparison(func.getChildren().get(2).getTypeInfo(), func.getChildren().get(3).getTypeInfo()));
    } else if (isIN) {
        // We're only considering the first element of the IN list for the type
        assert func.getChildren().size() > 1;
        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    }
    for (int i = 0; i < func.getChildren().size(); ++i) {
        ExprNodeDesc childExpr = func.getChildren().get(i);
        tmpExprNode = childExpr;
        if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
            if (isCompare || isBetween || isIN) {
                // For BETWEEN skip the first child (the revert boolean)
                if (!isBetween || i > 0) {
                    tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
                }
            } else if (isNumeric) {
                // For numeric, we'll do minimum necessary cast - if we cast to the type
                // of expression, bad things will happen.
                PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
                tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
            } else {
                throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
            }
        }
        isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE;
        argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
        tmpRN = convert(tmpExprNode);
        childRexNodeLst.add(tmpRN);
    }
    // See if this is an explicit cast.
    RexNode expr = null;
    RelDataType retType = null;
    expr = handleExplicitCast(func, childRexNodeLst);
    if (expr == null) {
        // This is not a cast; process the function.
        retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
        SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
        if (calciteOp.getKind() == SqlKind.CASE) {
            // If it is a case operator, we need to rewrite it
            childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst);
        } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
            // If it is a extract operator, we need to rewrite it
            childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst);
        } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
            // If it is a floor <date> operator, we need to rewrite it
            childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst);
        } else if (calciteOp.getKind() == SqlKind.IN && childRexNodeLst.size() == 2 && isAllPrimitive) {
            // if it is a single item in an IN clause, transform A IN (B) to A = B
            // from IN [A,B] => EQUALS [A,B]
            // except complex types
            calciteOp = SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=").getGenericUDF(), argTypeBldr.build(), retType);
        }
        expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst);
    } else {
        retType = expr.getType();
    }
    // an exception
    if (flattenExpr && (expr instanceof RexCall) && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
        RexCall call = (RexCall) expr;
        expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
    }
    return expr;
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) GenericUDFCase(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase) SqlCastFunction(org.apache.calcite.sql.fun.SqlCastFunction) SqlOperator(org.apache.calcite.sql.SqlOperator) GenericUDFBaseBinary(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ArrayList(java.util.ArrayList) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) RelDataType(org.apache.calcite.rel.type.RelDataType) GenericUDFUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RexCall(org.apache.calcite.rex.RexCall) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSubquerySemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

GenericUDFBaseCompare (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 ArrayList (java.util.ArrayList)5 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 GenericUDFIn (org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn)3 GenericUDFOPNot (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot)3 GenericUDFOPNotNull (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull)3 List (java.util.List)2 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)2 GenericUDFBetween (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween)2 GenericUDFCase (org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase)2 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)2 GenericUDFOPNull (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull)2 GenericUDFOPOr (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr)2 GenericUDFToUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp)2 GenericUDFUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp)2 GenericUDFWhen (org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen)2