use of in project hive by apache.
the class RexNodeConverter method convert.
private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List<RexNode> childRexNodeLst = new ArrayList<>();
Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && !func.getChildren().isEmpty();
boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween;
boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn;
boolean isAllPrimitive = true;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
} else if (isWhenCase) {
// as they are not allowed
if (checkForStatefulFunctions(func.getChildren())) {
throw new SemanticException("Stateful expressions cannot be used inside of CASE");
} else if (isTransformableTimeStamp) {
// unix_timestamp(args) -> to_unix_timestamp(args)
func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
} else if (isBetween) {
assert func.getChildren().size() == 4;
// We skip first child as is not involved (is the revert boolean)
// The target type needs to account for all 3 operands
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(1).getTypeInfo(), FunctionRegistry.getCommonClassForComparison(func.getChildren().get(2).getTypeInfo(), func.getChildren().get(3).getTypeInfo()));
} else if (isIN) {
// We're only considering the first element of the IN list for the type
assert func.getChildren().size() > 1;
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
for (int i = 0; i < func.getChildren().size(); ++i) {
ExprNodeDesc childExpr = func.getChildren().get(i);
tmpExprNode = childExpr;
if (tgtDT != null && tgtDT.getCategory() == Category.PRIMITIVE && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare || isBetween || isIN) {
// For BETWEEN skip the first child (the revert boolean)
if (!isBetween || i > 0) {
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE;
argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), typeFactory));
tmpRN = convert(tmpExprNode);
// See if this is an explicit cast.
RelDataType retType = TypeConverter.convert(func.getTypeInfo(), typeFactory);
RexNode expr = handleExplicitCast(func.getGenericUDF(), retType, childRexNodeLst, rexBuilder);
if (expr == null) {
// This is not a cast; process the function.
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(),, retType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
childRexNodeLst = rewriteCaseChildren(func.getFuncText(), childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a extract operator, we need to rewrite it
childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a floor <date> operator, we need to rewrite it
childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) {
if (childRexNodeLst.size() == 2) {
// if it is a single item in an IN clause, transform A IN (B) to A = B
// from IN [A,B] => EQUALS [A,B]
// except complex types
calciteOp = SqlStdOperatorTable.EQUALS;
} else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) {
// if it is more than an single item in an IN clause,
// transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]]
// except complex types
// Rewrite to OR is done only if number of operands are less than
// the threshold configured
childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder);
calciteOp = SqlStdOperatorTable.OR;
} else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) {
// Rewrite COALESCE as a CASE
// This allows to be further reduced to OR, if possible
calciteOp = SqlStdOperatorTable.CASE;
childRexNodeLst = rewriteCoalesceChildren(childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (calciteOp == HiveToDateSqlOperator.INSTANCE) {
childRexNodeLst = rewriteToDateChildren(childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.BETWEEN) {
assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse();
childRexNodeLst = rewriteBetweenChildren(childRexNodeLst, rexBuilder);
if (childRexNodeLst.get(0).isAlwaysTrue()) {
calciteOp = SqlStdOperatorTable.OR;
} else {
calciteOp = SqlStdOperatorTable.AND;
expr = rexBuilder.makeCall(retType, calciteOp, childRexNodeLst);
} else {
retType = expr.getType();
// an exception
if (expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = rexBuilder.makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
return expr;
use of in project hive by apache.
the class FilterSelectivityEstimator method getOp.
private SqlKind getOp(RexCall call) {
SqlKind op = call.getKind();
if (call.getKind().equals(SqlKind.OTHER_FUNCTION) && SqlTypeUtil.inBooleanFamily(call.getType())) {
SqlOperator sqlOp = call.getOperator();
String opName = (sqlOp != null) ? sqlOp.getName() : "";
if (opName.equalsIgnoreCase("in")) {
op = SqlKind.IN;
return op;
use of in project hive by apache.
the class HiveFunctionHelper method getExpression.
* {@inheritDoc}
public RexNode getExpression(String functionText, FunctionInfo fi, List<RexNode> inputs, RelDataType returnType) throws SemanticException {
// See if this is an explicit cast.
RexNode expr = RexNodeConverter.handleExplicitCast(fi.getGenericUDF(), returnType, inputs, rexBuilder);
if (expr == null) {
// This is not a cast; process the function.
ImmutableList.Builder<RelDataType> argsTypes = ImmutableList.builder();
for (RexNode input : inputs) {
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(functionText, fi.getGenericUDF(),, returnType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
inputs = RexNodeConverter.rewriteCaseChildren(functionText, inputs, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder);
} else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a extract operator, we need to rewrite it
inputs = RexNodeConverter.rewriteExtractDateChildren(calciteOp, inputs, rexBuilder);
} else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a floor <date> operator, we need to rewrite it
inputs = RexNodeConverter.rewriteFloorDateChildren(calciteOp, inputs, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.IN) {
// if it is a single item in an IN clause, transform A IN (B) to A = B
// from IN [A,B] => EQUALS [A,B]
// if it is more than an single item in an IN clause,
// transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]]
// Rewrite to OR is done only if number of operands are less than
// the threshold configured
boolean rewriteToOr = true;
if (maxNodesForInToOrTransformation != 0) {
if (inputs.size() > maxNodesForInToOrTransformation) {
rewriteToOr = false;
if (rewriteToOr) {
// If there are non-deterministic functions, we cannot perform this rewriting
List<RexNode> newInputs = RexNodeConverter.transformInToOrOperands(inputs, rexBuilder);
if (newInputs != null) {
inputs = newInputs;
if (inputs.size() == 1) {
calciteOp = SqlStdOperatorTable.OR;
} else if (calciteOp.getKind() == SqlKind.COALESCE && inputs.size() > 1) {
// Rewrite COALESCE as a CASE
// This allows to be further reduced to OR, if possible
calciteOp = SqlStdOperatorTable.CASE;
inputs = RexNodeConverter.rewriteCoalesceChildren(inputs, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder);
} else if (calciteOp == HiveToDateSqlOperator.INSTANCE) {
inputs = RexNodeConverter.rewriteToDateChildren(inputs, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.BETWEEN) {
assert inputs.get(0).isAlwaysTrue() || inputs.get(0).isAlwaysFalse();
boolean invert = inputs.get(0).isAlwaysTrue();
SqlBinaryOperator cmpOp;
if (invert) {
calciteOp = SqlStdOperatorTable.OR;
cmpOp = SqlStdOperatorTable.GREATER_THAN;
} else {
calciteOp = SqlStdOperatorTable.AND;
cmpOp = SqlStdOperatorTable.LESS_THAN_OR_EQUAL;
RexNode op = inputs.get(1);
RexNode rangeL = inputs.get(2);
RexNode rangeH = inputs.get(3);
inputs = new ArrayList<>();
inputs.add(rexBuilder.makeCall(cmpOp, rangeL, op));
inputs.add(rexBuilder.makeCall(cmpOp, op, rangeH));
} else if (calciteOp == HiveUnixTimestampSqlOperator.INSTANCE && inputs.size() > 0) {
// unix_timestamp(args) -> to_unix_timestamp(args)
calciteOp = HiveToUnixTimestampSqlOperator.INSTANCE;
expr = rexBuilder.makeCall(returnType, calciteOp, inputs);
if (expr instanceof RexCall && !expr.isA(SqlKind.CAST)) {
RexCall call = (RexCall) expr;
expr = rexBuilder.makeCall(returnType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
return expr;
use of in project hive by apache.
the class HiveRelDecorrelator method decorrelateRel.
* Rewrite Correlator into a left outer join.
* @param rel Correlator
public Frame decorrelateRel(LogicalCorrelate rel) {
// Rewrite logic:
// The original left input will be joined with the new right input that
// has generated correlated variables propagated up. For any generated
// cor vars that are not used in the join key, pass them along to be
// joined later with the CorrelatorRels that produce them.
// the right input to Correlator should produce correlated variables
final RelNode oldLeft = rel.getInput(0);
final RelNode oldRight = rel.getInput(1);
boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight);
final Frame leftFrame = getInvoke(oldLeft, rel);
final Frame rightFrame = getInvoke(oldRight, rel);
if (leftFrame == null || rightFrame == null) {
// If any input has not been rewritten, do not rewrite this rel.
return null;
if (rightFrame.corDefOutputs.isEmpty()) {
return null;
assert rel.getRequiredColumns().cardinality() <= rightFrame.corDefOutputs.keySet().size();
// Change correlator rel into a join.
// Join all the correlated variables produced by this correlator rel
// with the values generated and propagated from the right input
final SortedMap<CorDef, Integer> corDefOutputs = new TreeMap<>(rightFrame.corDefOutputs);
final List<RexNode> conditions = new ArrayList<>();
final List<RelDataTypeField> newLeftOutput = leftFrame.r.getRowType().getFieldList();
int newLeftFieldCount = newLeftOutput.size();
final List<RelDataTypeField> newRightOutput = rightFrame.r.getRowType().getFieldList();
for (Map.Entry<CorDef, Integer> rightOutput : new ArrayList<>(corDefOutputs.entrySet())) {
final CorDef corDef = rightOutput.getKey();
if (!corDef.corr.equals(rel.getCorrelationId())) {
final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field);
final int newRightPos = rightOutput.getValue();
SqlOperator callOp = corDef.getPredicateKind() == null ? SqlStdOperatorTable.EQUALS : corDef.getPredicateKind();
if (corDef.isLeft) {
conditions.add(rexBuilder.makeCall(callOp, RexInputRef.of(newLeftPos, newLeftOutput), new RexInputRef(newLeftFieldCount + newRightPos, newRightOutput.get(newRightPos).getType())));
} else {
conditions.add(rexBuilder.makeCall(callOp, new RexInputRef(newLeftFieldCount + newRightPos, newRightOutput.get(newRightPos).getType()), RexInputRef.of(newLeftPos, newLeftOutput)));
// remove this cor var from output position mapping
// vars that are not used in the join key.
for (CorDef corDef : corDefOutputs.keySet()) {
int newPos = corDefOutputs.get(corDef) + newLeftFieldCount;
corDefOutputs.put(corDef, newPos);
// then add any cor var from the left input. Do not need to change
// output positions.
// Create the mapping between the output of the old correlation rel
// and the new join rel
final Map<Integer, Integer> mapOldToNewOutputs = new HashMap<>();
int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
int oldRightFieldCount = oldRight.getRowType().getFieldCount();
// Left input positions are not changed.
final RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions, false);
RelNode newJoin = null;
// this indicates original query was either correlated EXISTS or IN
if (rel.getJoinType() == JoinRelType.SEMI || rel.getJoinType() == JoinRelType.ANTI) {
final List<Integer> leftKeys = new ArrayList<Integer>();
final List<Integer> rightKeys = new ArrayList<Integer>();
RelNode[] inputRels = new RelNode[] { leftFrame.r, rightFrame.r };
if (rel.getJoinType() == JoinRelType.ANTI) {
newJoin = HiveAntiJoin.getAntiJoin(rel.getCluster(), rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition);
} else {
newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition);
} else {
// Right input positions are shifted by newLeftFieldCount.
for (int i = 0; i < oldRightFieldCount; i++) {
mapOldToNewOutputs.put(i + oldLeftFieldCount, rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
newJoin = relBuilder.push(leftFrame.r).push(rightFrame.r).join(rel.getJoinType(), condition).build();
return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs);
use of in project hive by apache.
the class HiveRelDecorrelator method decorrelateInputWithValueGenerator.
private Frame decorrelateInputWithValueGenerator(RelNode rel) {
// currently only handles one input input
assert rel.getInputs().size() == 1;
RelNode oldInput = rel.getInput(0);
final Frame frame = map.get(oldInput);
final SortedMap<CorDef, Integer> corDefOutputs = new TreeMap<>(frame.corDefOutputs);
final Collection<CorRef> corVarList = cm.mapRefRelToCorRef.get(rel);
// This means that we do not need a value generator.
if (rel instanceof Filter) {
SortedMap<CorDef, Integer> coreMap = new TreeMap<>();
for (CorRef correlation : corVarList) {
final CorDef def = correlation.def();
// we don't need to create value generator for them.
if (corDefOutputs.containsKey(def)) {
coreMap.put(def, corDefOutputs.get(def));
// seen this before in this loop so we don't need to treat it again.
if (coreMap.containsKey(def)) {
try {
findCorrelationEquivalent(correlation, ((Filter) rel).getCondition());
} catch (Util.FoundOne e) {
// we need to keep predicate kind e.g. EQUAL or NOT EQUAL
// so that later while decorrelating LogicalCorrelate appropriate join predicate
// is generated
def.setPredicateKind((SqlOperator) ((Pair) ((Pair) e.getNode()).getValue()).getKey());
def.setIsLeft((boolean) ((Pair) ((Pair) e.getNode()).getValue()).getValue());
final Integer oldInputRef = (Integer) ((Pair) e.getNode()).getKey();
final Integer newInputRef = frame.oldToNewOutputs.get(oldInputRef);
coreMap.put(def, newInputRef);
// generator.
if (coreMap.size() == corVarList.size()) {
return register(oldInput, frame.r, frame.oldToNewOutputs, coreMap);
int leftInputOutputCount = frame.r.getRowType().getFieldCount();
// can directly add positions into corDefOutputs since join
// does not change the output ordering from the inputs.
RelNode valueGenRel = createValueGenerator(corVarList, leftInputOutputCount, corDefOutputs);
RelNode join = relBuilder.push(frame.r).push(valueGenRel).join(JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
// Filter) are in the output and in the same position.
return register(oldInput, join, frame.oldToNewOutputs, corDefOutputs);