use of org.apache.hadoop.hive.ql.udf.generic.GenericUDF in project hive by apache.
the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren5.
// Test select named_struct from named_struct:struct<a:boolean,b:double>
@Test
public void testGetSelectNestedColPathsFromChildren5() {
ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a");
ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b");
List<ExprNodeDesc> list = new ArrayList<>();
list.add(constADesc);
list.add(constBDesc);
GenericUDF udf = mock(GenericUDF.class);
ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list);
ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false);
final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths);
List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
// Return empty result since only constant Desc exists
assertEquals(0, groups.size());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDF in project hive by apache.
the class ConstantPropagateProcFactory method foldExprFull.
/**
* Fold input expression desc.
*
* This function recursively checks if any subexpression of a specified expression
* can be evaluated to be constant and replaces such subexpression with the constant.
* If the expression is a deterministic UDF and all the subexpressions are constants,
* the value will be calculated immediately (during compilation time vs. runtime).
* e.g.:
* concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
* unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
* @param desc folding expression
* @param constants current propagated constant map
* @param cppCtx
* @param op processing operator
* @param propagate if true, assignment expressions will be added to constants.
* @return fold expression
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
// Combine NOT operator with the child operator. Otherwise, the following optimization
// from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
// should not be optimized to not(x > 3), but (x <=3 or x is null).
desc = foldNegative(desc);
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc childExpr : desc.getChildren()) {
newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
}
// Don't evaluate nondeterministic function since the value can only calculate during runtime.
if (!isDeterministicUdf(udf, newExprs)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
return desc;
} else {
// If all child expressions of deterministic function are constants, evaluate such UDF immediately
ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
if (constant != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + constant);
}
return constant;
} else {
// Check if the function can be short cut.
ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
if (shortcut != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + shortcut);
}
return shortcut;
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
}
// constant, add them to colToConstants as half-deterministic columns.
if (propagate) {
propagate(udf, newExprs, op.getSchema(), constants);
}
}
return desc;
} else if (desc instanceof ExprNodeColumnDesc) {
if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
return desc;
}
Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
if (col != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + col);
}
return col;
}
}
return desc;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDF in project hive by apache.
the class ConstantPropagateProcFactory method foldNegative.
/**
* Combines the logical not() operator with the child operator if possible.
* @param desc the expression to be evaluated
* @return the new expression to be replaced
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldNegative(ExprNodeDesc desc) throws UDFArgumentException {
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
if (udf instanceof GenericUDFOPNot) {
ExprNodeDesc child = funcDesc.getChildren().get(0);
if (child instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc childDesc = (ExprNodeGenericFuncDesc) child;
GenericUDF childUDF = childDesc.getGenericUDF();
List<ExprNodeDesc> grandChildren = child.getChildren();
if (childUDF instanceof GenericUDFBaseCompare || childUDF instanceof GenericUDFOPNull || childUDF instanceof GenericUDFOPNotNull) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPAnd || childUDF instanceof GenericUDFOPOr) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), Arrays.asList(grandChild))));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPNot) {
return foldNegative(child.getChildren().get(0));
} else {
// For operator like if() that cannot be handled, leave not() as it
// is and continue processing the children
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
childDesc.setChildren(newGrandChildren);
return funcDesc;
}
}
}
}
return desc;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDF in project hive by apache.
the class PartitionPruner method compactExpr.
/**
* Taking a partition pruning expression, remove the null operands and non-partition columns.
* The reason why there are null operands is ExprProcFactory classes, for example
* PPRColumnExprProcessor.
* @param expr original partition pruning expression.
* @return partition pruning expression that only contains partition columns.
*/
@VisibleForTesting
static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
// If this is a constant boolean expression, return the value.
if (expr == null) {
return null;
}
if (expr instanceof ExprNodeConstantDesc) {
if (((ExprNodeConstantDesc) expr).getValue() == null)
return null;
if (!isBooleanExpr(expr)) {
throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + expr.getExprString());
}
return expr;
} else if (expr instanceof ExprNodeGenericFuncDesc) {
GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
boolean isAnd = udf instanceof GenericUDFOPAnd;
boolean isOr = udf instanceof GenericUDFOPOr;
List<ExprNodeDesc> children = expr.getChildren();
if (isAnd) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allTrue = true;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (!isTrueExpr(compactChild)) {
newChildren.add(compactChild);
allTrue = false;
}
if (isFalseExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
} else {
allTrue = false;
}
}
if (allTrue) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (newChildren.size() == 0) {
return null;
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
} else if (isOr) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allFalse = true;
boolean isNull = false;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (isTrueExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (!isNull && !isFalseExpr(compactChild)) {
newChildren.add(compactChild);
allFalse = false;
}
} else {
isNull = true;
}
}
if (isNull) {
return null;
}
if (allFalse) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
}
return expr;
} else {
throw new IllegalStateException("Unexpected type of ExprNodeDesc: " + expr.getExprString());
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDF in project phoenix by apache.
the class IndexPredicateAnalyzer method getColumnExpr.
// Check if ExprNodeColumnDesc is wrapped in expr.
// If so, peel off. Otherwise return itself.
private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
if (expr instanceof ExprNodeColumnDesc) {
return expr;
}
ExprNodeGenericFuncDesc funcDesc = null;
if (expr instanceof ExprNodeGenericFuncDesc) {
funcDesc = (ExprNodeGenericFuncDesc) expr;
}
if (null == funcDesc) {
return expr;
}
GenericUDF udf = funcDesc.getGenericUDF();
// check if its a simple cast expression.
if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
return expr.getChildren().get(0);
}
return expr;
}
Aggregations