Search in sources :

Example 1 with GenericUDFWhen

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen in project hive by apache.

the class ConstantPropagateProcFactory method shortcutFunction.

private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs, Operator<? extends Serializable> op) throws UDFArgumentException {
    if (udf instanceof GenericUDFOPEqual) {
        assert newExprs.size() == 2;
        boolean foundUDFInFirst = false;
        ExprNodeGenericFuncDesc caseOrWhenexpr = null;
        if (newExprs.get(0) instanceof ExprNodeGenericFuncDesc) {
            caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(0);
            if (caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase) {
                foundUDFInFirst = true;
            }
        }
        if (!foundUDFInFirst && newExprs.get(1) instanceof ExprNodeGenericFuncDesc) {
            caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(1);
            if (!(caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase)) {
                return null;
            }
        }
        if (null == caseOrWhenexpr) {
            // we didn't find case or when udf
            return null;
        }
        GenericUDF childUDF = caseOrWhenexpr.getGenericUDF();
        List<ExprNodeDesc> children = caseOrWhenexpr.getChildren();
        int i;
        if (childUDF instanceof GenericUDFWhen) {
            for (i = 1; i < children.size(); i += 2) {
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            if (children.size() % 2 == 1) {
                i = children.size() - 1;
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            // after constant folding of child expression the return type of UDFWhen might have changed,
            // so recreate the expression
            ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
            return newCaseOrWhenExpr;
        } else if (childUDF instanceof GenericUDFCase) {
            for (i = 2; i < children.size(); i += 2) {
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            if (children.size() % 2 == 0) {
                i = children.size() - 1;
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            // after constant folding of child expression the return type of UDFCase might have changed,
            // so recreate the expression
            ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
            return newCaseOrWhenExpr;
        } else {
            // cant happen
            return null;
        }
    }
    if (udf instanceof GenericUDFOPAnd) {
        final BitSet positionsToRemove = new BitSet();
        final List<ExprNodeDesc> notNullExprs = new ArrayList<ExprNodeDesc>();
        final List<Integer> notNullExprsPositions = new ArrayList<Integer>();
        final List<ExprNodeDesc> compareExprs = new ArrayList<ExprNodeDesc>();
        for (int i = 0; i < newExprs.size(); i++) {
            ExprNodeDesc childExpr = newExprs.get(i);
            if (childExpr instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
                if (Boolean.TRUE.equals(c.getValue())) {
                    // if true, prune it
                    positionsToRemove.set(i);
                } else {
                    // if false, return false
                    return childExpr;
                }
            } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc) {
                notNullExprs.add(childExpr.getChildren().get(0));
                notNullExprsPositions.add(i);
            } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFBaseCompare && !(((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotEqual) && childExpr.getChildren().size() == 2) {
                // Try to fold (key <op> 86) and (key is not null) to (key <op> 86)
                // where <op> can be "=", ">=", "<=", ">", "<".
                // Note: (key <> 86) and (key is not null) cannot be folded
                ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0));
                if (null == colDesc) {
                    colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1));
                }
                if (colDesc != null) {
                    compareExprs.add(colDesc);
                }
            }
        }
        // Try to fold (key = 86) and (key is not null) to (key = 86)
        for (int i = 0; i < notNullExprs.size(); i++) {
            for (ExprNodeDesc other : compareExprs) {
                if (notNullExprs.get(i).isSame(other)) {
                    positionsToRemove.set(notNullExprsPositions.get(i));
                    break;
                }
            }
        }
        // Remove unnecessary expressions
        int pos = 0;
        int removed = 0;
        while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
            newExprs.remove(pos - removed);
            pos++;
            removed++;
        }
        if (newExprs.size() == 0) {
            return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
        }
        if (newExprs.size() == 1) {
            return newExprs.get(0);
        }
    }
    if (udf instanceof GenericUDFOPOr) {
        final BitSet positionsToRemove = new BitSet();
        for (int i = 0; i < newExprs.size(); i++) {
            ExprNodeDesc childExpr = newExprs.get(i);
            if (childExpr instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
                if (Boolean.FALSE.equals(c.getValue())) {
                    // if false, prune it
                    positionsToRemove.set(i);
                } else if (Boolean.TRUE.equals(c.getValue())) {
                    // if true return true
                    return childExpr;
                }
            }
        }
        int pos = 0;
        int removed = 0;
        while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
            newExprs.remove(pos - removed);
            pos++;
            removed++;
        }
        if (newExprs.size() == 0) {
            return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE);
        }
        if (newExprs.size() == 1) {
            return newExprs.get(0);
        }
    }
    if (udf instanceof GenericUDFWhen) {
        if (!(newExprs.size() == 2 || newExprs.size() == 3)) {
            // we currently only handle either 1 or 2 branch.
            return null;
        }
        ExprNodeDesc thenExpr = newExprs.get(1);
        ExprNodeDesc elseExpr = newExprs.size() == 3 ? newExprs.get(2) : new ExprNodeConstantDesc(newExprs.get(1).getTypeInfo(), null);
        ExprNodeDesc whenExpr = newExprs.get(0);
        if (whenExpr instanceof ExprNodeConstantDesc) {
            Boolean whenVal = (Boolean) ((ExprNodeConstantDesc) whenExpr).getValue();
            return (whenVal == null || Boolean.FALSE.equals(whenVal)) ? elseExpr : thenExpr;
        }
        if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
            ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
            ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
            Object thenVal = constThen.getValue();
            Object elseVal = constElse.getValue();
            if (thenVal == null) {
                if (elseVal == null) {
                    // both branches are null.
                    return thenExpr;
                } else if (op instanceof FilterOperator) {
                    // we can still fold, since here null is equivalent to false.
                    return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), newExprs.subList(0, 1)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
                } else {
                    // can't do much, expression is not in context of filter, so we can't treat null as equivalent to false here.
                    return null;
                }
            } else if (elseVal == null && op instanceof FilterOperator) {
                return Boolean.TRUE.equals(thenVal) ? whenExpr : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
            } else if (thenVal.equals(elseVal)) {
                return thenExpr;
            } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
                List<ExprNodeDesc> children = new ArrayList<>();
                children.add(whenExpr);
                children.add(new ExprNodeConstantDesc(false));
                ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), children);
                if (Boolean.TRUE.equals(thenVal)) {
                    return func;
                } else {
                    List<ExprNodeDesc> exprs = new ArrayList<>();
                    exprs.add(func);
                    return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
                }
            } else {
                return null;
            }
        }
    }
    if (udf instanceof GenericUDFCase) {
        // where ss_sold_date= '1998-01-01' ;
        if (!(newExprs.size() == 3 || newExprs.size() == 4)) {
            // we currently only handle either 1 or 2 branch.
            return null;
        }
        ExprNodeDesc thenExpr = newExprs.get(2);
        ExprNodeDesc elseExpr = newExprs.size() == 4 ? newExprs.get(3) : new ExprNodeConstantDesc(newExprs.get(2).getTypeInfo(), null);
        if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
            ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
            ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
            Object thenVal = constThen.getValue();
            Object elseVal = constElse.getValue();
            if (thenVal == null) {
                if (null == elseVal) {
                    return thenExpr;
                } else if (op instanceof FilterOperator) {
                    return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
                } else {
                    return null;
                }
            } else if (null == elseVal && op instanceof FilterOperator) {
                return Boolean.TRUE.equals(thenVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
            } else if (thenVal.equals(elseVal)) {
                return thenExpr;
            } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
                ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2));
                List<ExprNodeDesc> children = new ArrayList<>();
                children.add(equal);
                children.add(new ExprNodeConstantDesc(false));
                ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), children);
                if (Boolean.TRUE.equals(thenVal)) {
                    return func;
                } else {
                    List<ExprNodeDesc> exprs = new ArrayList<>();
                    exprs.add(func);
                    return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
                }
            } else {
                return null;
            }
        }
    }
    if (udf instanceof GenericUDFUnixTimeStamp) {
        if (newExprs.size() >= 1) {
            // unix_timestamp(args) -> to_unix_timestamp(args)
            return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs);
        }
    }
    return null;
}
Also used : GenericUDFCase(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ArrayList(java.util.ArrayList) GenericUDFUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDFOPNotEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) BitSet(java.util.BitSet) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFNvl(org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 2 with GenericUDFWhen

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen in project hive by apache.

the class RexNodeConverter method convert.

private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
    ExprNodeDesc tmpExprNode;
    RexNode tmpRN;
    List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
    Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();
    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
    TypeInfo tgtDT = null;
    GenericUDF tgtUdf = func.getGenericUDF();
    boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
    boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
    boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
    boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0;
    if (isNumeric) {
        tgtDT = func.getTypeInfo();
        assert func.getChildren().size() == 2;
    // TODO: checking 2 children is useless, compare already does that.
    } else if (isCompare && (func.getChildren().size() == 2)) {
        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    } else if (isWhenCase) {
        // as they are not allowed
        if (checkForStatefulFunctions(func.getChildren())) {
            throw new SemanticException("Stateful expressions cannot be used inside of CASE");
        }
    } else if (isTransformableTimeStamp) {
        // unix_timestamp(args) -> to_unix_timestamp(args)
        func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
    }
    for (ExprNodeDesc childExpr : func.getChildren()) {
        tmpExprNode = childExpr;
        if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
            if (isCompare) {
                // For compare, we will convert requisite children
                tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
            } else if (isNumeric) {
                // For numeric, we'll do minimum necessary cast - if we cast to the type
                // of expression, bad things will happen.
                PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
                tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
            } else {
                throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
            }
        }
        argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
        tmpRN = convert(tmpExprNode);
        childRexNodeLst.add(tmpRN);
    }
    // See if this is an explicit cast.
    RexNode expr = null;
    RelDataType retType = null;
    expr = handleExplicitCast(func, childRexNodeLst);
    if (expr == null) {
        // This is not a cast; process the function.
        retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
        SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
        if (calciteOp.getKind() == SqlKind.CASE) {
            // If it is a case operator, we need to rewrite it
            childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst);
        } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
            // If it is a extract operator, we need to rewrite it
            childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst);
        } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
            // If it is a floor <date> operator, we need to rewrite it
            childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst);
        }
        expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
    } else {
        retType = expr.getType();
    }
    // an exception
    if (flattenExpr && (expr instanceof RexCall) && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
        RexCall call = (RexCall) expr;
        expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
    }
    return expr;
}
Also used : GenericUDFCase(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase) SqlCastFunction(org.apache.calcite.sql.fun.SqlCastFunction) SqlOperator(org.apache.calcite.sql.SqlOperator) GenericUDFBaseBinary(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ArrayList(java.util.ArrayList) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) RelDataType(org.apache.calcite.rel.type.RelDataType) GenericUDFUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RexCall(org.apache.calcite.rex.RexCall) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSubquerySemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

ArrayList (java.util.ArrayList)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)2 GenericUDFBaseCompare (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare)2 GenericUDFCase (org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase)2 GenericUDFToUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp)2 GenericUDFUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp)2 GenericUDFWhen (org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen)2 BitSet (java.util.BitSet)1 List (java.util.List)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexCall (org.apache.calcite.rex.RexCall)1 RexNode (org.apache.calcite.rex.RexNode)1 SqlOperator (org.apache.calcite.sql.SqlOperator)1 SqlCastFunction (org.apache.calcite.sql.fun.SqlCastFunction)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)1 CalciteSubquerySemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException)1 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1