use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn in project hive by apache.
the class HiveFunctionHelper method convertInputs.
/**
* {@inheritDoc}
*/
@Override
public List<RexNode> convertInputs(FunctionInfo fi, List<RexNode> inputs, RelDataType returnType) throws SemanticException {
// 1) Obtain UDF
final GenericUDF genericUDF = fi.getGenericUDF();
final TypeInfo typeInfo = TypeConverter.convert(returnType);
TypeInfo targetType = null;
boolean isNumeric = genericUDF instanceof GenericUDFBaseBinary && typeInfo.getCategory() == Category.PRIMITIVE && PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
boolean isCompare = !isNumeric && genericUDF instanceof GenericUDFBaseCompare;
boolean isBetween = !isNumeric && genericUDF instanceof GenericUDFBetween;
boolean isIN = !isNumeric && genericUDF instanceof GenericUDFIn;
if (isNumeric) {
targetType = typeInfo;
} else if (genericUDF instanceof GenericUDFBaseCompare) {
targetType = FunctionRegistry.getCommonClassForComparison(TypeConverter.convert(inputs.get(0).getType()), TypeConverter.convert(inputs.get(1).getType()));
} else if (genericUDF instanceof GenericUDFBetween) {
assert inputs.size() == 4;
// We skip first child as is not involved (is the revert boolean)
// The target type needs to account for all 3 operands
targetType = FunctionRegistry.getCommonClassForComparison(TypeConverter.convert(inputs.get(1).getType()), FunctionRegistry.getCommonClassForComparison(TypeConverter.convert(inputs.get(2).getType()), TypeConverter.convert(inputs.get(3).getType())));
} else if (genericUDF instanceof GenericUDFIn) {
// We're only considering the first element of the IN list for the type
assert inputs.size() > 1;
targetType = FunctionRegistry.getCommonClassForComparison(TypeConverter.convert(inputs.get(0).getType()), TypeConverter.convert(inputs.get(1).getType()));
}
if (targetType != null) {
List<RexNode> newInputs = new ArrayList<>();
// Convert inputs if needed
for (int i = 0; i < inputs.size(); ++i) {
RexNode input = inputs.get(i);
TypeInfo inputTypeInfo = TypeConverter.convert(input.getType());
RexNode tmpExprNode = input;
if (TypeInfoUtils.isConversionRequiredForComparison(targetType, inputTypeInfo)) {
if (isIN || isCompare) {
// For IN and compare, we will convert requisite children
tmpExprNode = convert(targetType, input);
} else if (isBetween) {
// For BETWEEN skip the first child (the revert boolean)
if (i > 0) {
tmpExprNode = convert(targetType, input);
}
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(inputTypeInfo, targetType);
tmpExprNode = convert(minArgType, input);
} else {
throw new AssertionError("Unexpected " + targetType + " - not a numeric op or compare");
}
}
newInputs.add(tmpExprNode);
}
return newInputs;
}
return inputs;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn in project hive by apache.
the class TestVectorizationContext method testInFiltersAndExprs.
// Test translation of both IN filters and boolean-valued IN expressions (non-filters).
@Test
public void testInFiltersAndExprs() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
// string IN
GenericUDFIn udf = new GenericUDFIn();
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
children1.add(col1Expr);
children1.add(constDesc);
children1.add(constDesc2);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertTrue(ve instanceof FilterStringColumnInList);
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertTrue(ve instanceof StringColumnInList);
// long IN
children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
children1.set(1, new ExprNodeConstantDesc(10));
children1.set(2, new ExprNodeConstantDesc(20));
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertTrue(ve instanceof FilterLongColumnInList);
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertTrue(ve instanceof LongColumnInList);
// double IN
children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
children1.set(1, new ExprNodeConstantDesc(10d));
children1.set(2, new ExprNodeConstantDesc(20d));
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertTrue(ve instanceof FilterDoubleColumnInList);
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertTrue(ve instanceof DoubleColumnInList);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn in project hive by apache.
the class TestKuduPredicateHandler method testInPredicates.
@Test
public void testInPredicates() throws Exception {
PrimitiveTypeInfo typeInfo = toHiveType(Type.STRING, null);
ExprNodeDesc colExpr = new ExprNodeColumnDesc(typeInfo, "string", null, false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(colExpr);
children.add(constDesc);
children.add(constDesc2);
ExprNodeGenericFuncDesc predicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFIn(), children);
// Verify KuduPredicateHandler.decompose
HiveStoragePredicateHandler.DecomposedPredicate decompose = KuduPredicateHandler.decompose(predicateExpr, SCHEMA);
// See note in KuduPredicateHandler.newAnalyzer.
assertNull(decompose);
List<KuduPredicate> predicates = expressionToPredicates(predicateExpr);
assertEquals(1, predicates.size());
scanWithPredicates(predicates);
// Also test NOT IN.
List<ExprNodeDesc> notChildren = Lists.newArrayList();
notChildren.add(predicateExpr);
ExprNodeGenericFuncDesc notPredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPNot(), notChildren);
// Verify KuduPredicateHandler.decompose
HiveStoragePredicateHandler.DecomposedPredicate decomposeNot = KuduPredicateHandler.decompose(notPredicateExpr, SCHEMA);
// See note in KuduPredicateHandler.newAnalyzer.
assertNull(decomposeNot);
List<KuduPredicate> notPredicates = expressionToPredicates(notPredicateExpr);
assertEquals(0, notPredicates.size());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn in project hive by apache.
the class RexNodeConverter method convert.
private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List<RexNode> childRexNodeLst = new ArrayList<>();
Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && !func.getChildren().isEmpty();
boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween;
boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn;
boolean isAllPrimitive = true;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
} else if (isWhenCase) {
// as they are not allowed
if (checkForStatefulFunctions(func.getChildren())) {
throw new SemanticException("Stateful expressions cannot be used inside of CASE");
}
} else if (isTransformableTimeStamp) {
// unix_timestamp(args) -> to_unix_timestamp(args)
func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
} else if (isBetween) {
assert func.getChildren().size() == 4;
// We skip first child as is not involved (is the revert boolean)
// The target type needs to account for all 3 operands
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(1).getTypeInfo(), FunctionRegistry.getCommonClassForComparison(func.getChildren().get(2).getTypeInfo(), func.getChildren().get(3).getTypeInfo()));
} else if (isIN) {
// We're only considering the first element of the IN list for the type
assert func.getChildren().size() > 1;
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
}
for (int i = 0; i < func.getChildren().size(); ++i) {
ExprNodeDesc childExpr = func.getChildren().get(i);
tmpExprNode = childExpr;
if (tgtDT != null && tgtDT.getCategory() == Category.PRIMITIVE && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare || isBetween || isIN) {
// For BETWEEN skip the first child (the revert boolean)
if (!isBetween || i > 0) {
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
}
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
}
}
isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE;
argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), typeFactory));
tmpRN = convert(tmpExprNode);
childRexNodeLst.add(tmpRN);
}
// See if this is an explicit cast.
RelDataType retType = TypeConverter.convert(func.getTypeInfo(), typeFactory);
RexNode expr = handleExplicitCast(func.getGenericUDF(), retType, childRexNodeLst, rexBuilder);
if (expr == null) {
// This is not a cast; process the function.
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
childRexNodeLst = rewriteCaseChildren(func.getFuncText(), childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a extract operator, we need to rewrite it
childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a floor <date> operator, we need to rewrite it
childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) {
if (childRexNodeLst.size() == 2) {
// if it is a single item in an IN clause, transform A IN (B) to A = B
// from IN [A,B] => EQUALS [A,B]
// except complex types
calciteOp = SqlStdOperatorTable.EQUALS;
} else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) {
// if it is more than an single item in an IN clause,
// transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]]
// except complex types
// Rewrite to OR is done only if number of operands are less than
// the threshold configured
childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder);
calciteOp = SqlStdOperatorTable.OR;
}
} else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) {
// Rewrite COALESCE as a CASE
// This allows to be further reduced to OR, if possible
calciteOp = SqlStdOperatorTable.CASE;
childRexNodeLst = rewriteCoalesceChildren(childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (calciteOp == HiveToDateSqlOperator.INSTANCE) {
childRexNodeLst = rewriteToDateChildren(childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.BETWEEN) {
assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse();
childRexNodeLst = rewriteBetweenChildren(childRexNodeLst, rexBuilder);
if (childRexNodeLst.get(0).isAlwaysTrue()) {
calciteOp = SqlStdOperatorTable.OR;
} else {
calciteOp = SqlStdOperatorTable.AND;
}
}
expr = rexBuilder.makeCall(retType, calciteOp, childRexNodeLst);
} else {
retType = expr.getType();
}
// an exception
if (expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = rexBuilder.makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
}
return expr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn in project hive by apache.
the class DynamicPartitionPruningOptimization method isNonEquiJoin.
private boolean isNonEquiJoin(ExprNodeDesc predicate) {
Preconditions.checkArgument(predicate instanceof ExprNodeGenericFuncDesc);
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) predicate;
if (funcDesc.getGenericUDF() instanceof GenericUDFIn) {
return false;
}
return true;
}
Aggregations