use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveSortLimitPullUpConstantsRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final RelNode parent = call.rel(0);
final Sort sort = call.rel(1);
final int count = sort.getInput().getRowType().getFieldCount();
if (count == 1) {
// Project operator.
return;
}
final RexBuilder rexBuilder = sort.getCluster().getRexBuilder();
final RelMetadataQuery mq = call.getMetadataQuery();
final RelOptPredicateList predicates = mq.getPulledUpPredicates(sort.getInput());
if (predicates == null) {
return;
}
Map<RexNode, RexNode> conditionsExtracted = HiveReduceExpressionsRule.predicateConstants(RexNode.class, rexBuilder, predicates);
Map<RexNode, RexNode> constants = new HashMap<>();
for (int i = 0; i < count; i++) {
RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i);
if (conditionsExtracted.containsKey(expr)) {
constants.put(expr, conditionsExtracted.get(expr));
}
}
// None of the expressions are constant. Nothing to do.
if (constants.isEmpty()) {
return;
}
if (count == constants.size()) {
// At least a single item in project is required.
constants.remove(constants.keySet().iterator().next());
}
// Create expressions for Project operators before and after the Sort
List<RelDataTypeField> fields = sort.getInput().getRowType().getFieldList();
List<Pair<RexNode, String>> newChildExprs = new ArrayList<>();
List<RexNode> topChildExprs = new ArrayList<>();
List<String> topChildExprsFields = new ArrayList<>();
for (int i = 0; i < count; i++) {
RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i);
RelDataTypeField field = fields.get(i);
if (constants.containsKey(expr)) {
topChildExprs.add(constants.get(expr));
topChildExprsFields.add(field.getName());
} else {
newChildExprs.add(Pair.<RexNode, String>of(expr, field.getName()));
topChildExprs.add(expr);
topChildExprsFields.add(field.getName());
}
}
// Update field collations
final Mappings.TargetMapping mapping = RelOptUtil.permutation(Pair.left(newChildExprs), sort.getInput().getRowType()).inverse();
List<RelFieldCollation> fieldCollations = new ArrayList<>();
for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) {
final int target = mapping.getTargetOpt(fc.getFieldIndex());
if (target < 0) {
// It is a constant, we can ignore it
continue;
}
fieldCollations.add(fc.copy(target));
}
// Update top Project positions
topChildExprs = ImmutableList.copyOf(RexUtil.apply(mapping, topChildExprs));
// Create new Project-Sort-Project sequence
final RelBuilder relBuilder = call.builder();
relBuilder.push(sort.getInput());
relBuilder.project(Pair.left(newChildExprs), Pair.right(newChildExprs));
final ImmutableList<RexNode> sortFields = relBuilder.fields(RelCollations.of(fieldCollations));
relBuilder.sortLimit(sort.offset == null ? -1 : RexLiteral.intValue(sort.offset), sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch), sortFields);
// Create top Project fixing nullability of fields
relBuilder.project(topChildExprs, topChildExprsFields);
relBuilder.convert(sort.getRowType(), false);
List<RelNode> inputs = new ArrayList<>();
for (RelNode child : parent.getInputs()) {
if (!((HepRelVertex) child).getCurrentRel().equals(sort)) {
inputs.add(child);
} else {
inputs.add(relBuilder.build());
}
}
call.transformTo(parent.copy(parent.getTraitSet(), inputs));
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveSubQueryRemoveRule method apply.
protected RexNode apply(RexSubQuery e, Set<CorrelationId> variablesSet, RelOptUtil.Logic logic, HiveSubQRemoveRelBuilder builder, int inputCount, int offset, boolean isCorrScalarAgg, boolean hasNoWindowingAndNoGby) {
switch(e.getKind()) {
case SCALAR_QUERY:
// since it is guaranteed to produce at most one row
if (!hasNoWindowingAndNoGby) {
final List<RexNode> parentQueryFields = new ArrayList<>();
if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
// we want to have project after join since sq_count_check's count() expression wouldn't
// be needed further up
parentQueryFields.addAll(builder.fields());
}
builder.push(e.rel);
// returns single row/column
builder.aggregate(builder.groupKey(), builder.count(false, "cnt"));
SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION);
// we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer
// ends up getting rid of Project since it is not used further up the tree
builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, builder.call(countCheck, builder.field("cnt")), builder.literal(1)));
if (!variablesSet.isEmpty()) {
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
} else {
builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
builder.project(parentQueryFields);
} else {
offset++;
}
}
if (isCorrScalarAgg) {
// Transformation :
// Outer Query Left Join (inner query) on correlated predicate
// and preserve rows only from left side.
builder.push(e.rel);
final List<RexNode> parentQueryFields = new ArrayList<>();
parentQueryFields.addAll(builder.fields());
// id is appended since there could be multiple scalar subqueries and FILTER
// is created using field name
String indicator = "alwaysTrue" + e.rel.getId();
parentQueryFields.add(builder.alias(builder.literal(true), indicator));
builder.project(parentQueryFields);
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
RexNode literal;
if (isAggZeroOnEmpty(e)) {
// since count has a return type of BIG INT we need to make a literal of type big int
// relbuilder's literal doesn't allow this
literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0));
} else {
literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e));
}
operands.add((builder.isNull(builder.field(indicator))), literal);
operands.add(field(builder, 1, builder.fields().size() - 2));
return builder.call(SqlStdOperatorTable.CASE, operands.build());
}
// Transformation is to left join for correlated predicates and inner join otherwise,
// but do a count on inner side before that to make sure it generates atmost 1 row.
builder.push(e.rel);
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
return field(builder, inputCount, offset);
case IN:
case EXISTS:
// Most general case, where the left and right keys might have nulls, and
// caller requires 3-valued logic return.
//
// select e.deptno, e.deptno in (select deptno from emp)
//
// becomes
//
// select e.deptno,
// case
// when ct.c = 0 then false
// when dt.i is not null then true
// when e.deptno is null then null
// when ct.ck < ct.c then null
// else false
// end
// from e
// left join (
// (select count(*) as c, count(deptno) as ck from emp) as ct
// cross join (select distinct deptno, true as i from emp)) as dt
// on e.deptno = dt.deptno
//
// If keys are not null we can remove "ct" and simplify to
//
// select e.deptno,
// case
// when dt.i is not null then true
// else false
// end
// from e
// left join (select distinct deptno, true as i from emp) as dt
// on e.deptno = dt.deptno
//
// We could further simplify to
//
// select e.deptno,
// dt.i is not null
// from e
// left join (select distinct deptno, true as i from emp) as dt
// on e.deptno = dt.deptno
//
// but have not yet.
//
// If the logic is TRUE we can just kill the record if the condition
// evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
// join:
//
// select e.deptno,
// true
// from e
// inner join (select distinct deptno from emp) as dt
// on e.deptno = dt.deptno
//
builder.push(e.rel);
final List<RexNode> fields = new ArrayList<>();
switch(e.getKind()) {
case IN:
fields.addAll(builder.fields());
// will produce wrong results (because we further rewrite such queries into JOIN)
if (isCorrScalarAgg) {
// returns single row/column
builder.aggregate(builder.groupKey(), builder.count(false, "cnt_in"));
if (!variablesSet.isEmpty()) {
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
} else {
builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION);
// we create FILTER (sq_count_check(count()) > 0) instead of PROJECT
// because RelFieldTrimmer ends up getting rid of Project
// since it is not used further up the tree
builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, // true here indicates that sq_count_check is for IN/NOT IN subqueries
builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), builder.literal(0)));
offset = offset + 1;
builder.push(e.rel);
}
}
// First, the cross join
switch(logic) {
case TRUE_FALSE_UNKNOWN:
case UNKNOWN_AS_TRUE:
// null keys we do not need to generate count(*), count(c)
if (e.getKind() == SqlKind.EXISTS) {
logic = RelOptUtil.Logic.TRUE_FALSE;
break;
}
builder.aggregate(builder.groupKey(), builder.count(false, "c"), builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", builder.fields()));
builder.as("ct");
if (!variablesSet.isEmpty()) {
// builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
} else {
builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
offset += 2;
builder.push(e.rel);
break;
}
// Now the left join
switch(logic) {
case TRUE:
if (fields.isEmpty()) {
builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
// since this is rewritting into semijoin
break;
} else {
builder.aggregate(builder.groupKey(0));
}
} else {
if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
// since this is rewritting into semijoin
break;
} else {
builder.aggregate(builder.groupKey(fields));
}
}
break;
default:
fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId()));
builder.project(fields);
builder.distinct();
}
builder.as("dt");
final List<RexNode> conditions = new ArrayList<>();
for (Pair<RexNode, RexNode> pair : Pair.zip(e.getOperands(), builder.fields())) {
conditions.add(builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
}
switch(logic) {
case TRUE:
builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true);
return builder.literal(true);
}
builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
final List<RexNode> keyIsNulls = new ArrayList<>();
for (RexNode operand : e.getOperands()) {
if (operand.getType().isNullable()) {
keyIsNulls.add(builder.isNull(operand));
}
}
final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
switch(logic) {
case TRUE_FALSE_UNKNOWN:
case UNKNOWN_AS_TRUE:
operands.add(builder.equals(builder.field("ct", "c"), builder.literal(0)), builder.literal(false));
// now that we are using LEFT OUTER JOIN to join inner count, count(*)
// with outer table, we wouldn't be able to tell if count is zero
// for inner table since inner join with correlated values will get rid
// of all values where join cond is not true (i.e where actual inner table
// will produce zero result). To handle this case we need to check both
// count is zero or count is null
operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
break;
}
operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), builder.literal(true));
if (!keyIsNulls.isEmpty()) {
// Calcite creates null literal with Null type here but
// because HIVE doesn't support null type it is appropriately typed boolean
operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN));
// we are creating filter here so should not be returning NULL.
// Not sure why Calcite return NULL
}
RexNode b = builder.literal(true);
switch(logic) {
case TRUE_FALSE_UNKNOWN:
b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN);
// fall through
case UNKNOWN_AS_TRUE:
operands.add(builder.call(SqlStdOperatorTable.LESS_THAN, builder.field("ct", "ck"), builder.field("ct", "c")), b);
break;
}
operands.add(builder.literal(false));
return builder.call(SqlStdOperatorTable.CASE, operands.build());
default:
throw new AssertionError(e.getKind());
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveRelMdPredicates method getPredicates.
/**
* Infers predicates for a Union.
*/
public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) {
RexBuilder rB = union.getCluster().getRexBuilder();
Map<String, RexNode> finalPreds = new HashMap<>();
List<RexNode> finalResidualPreds = new ArrayList<>();
for (int i = 0; i < union.getInputs().size(); i++) {
RelNode input = union.getInputs().get(i);
RelOptPredicateList info = mq.getPulledUpPredicates(input);
if (info.pulledUpPredicates.isEmpty()) {
return RelOptPredicateList.EMPTY;
}
Map<String, RexNode> preds = new HashMap<>();
List<RexNode> residualPreds = new ArrayList<>();
for (RexNode pred : info.pulledUpPredicates) {
final String predString = pred.toString();
if (i == 0) {
preds.put(predString, pred);
continue;
}
if (finalPreds.containsKey(predString)) {
preds.put(predString, pred);
} else {
residualPreds.add(pred);
}
}
// Add new residual preds
finalResidualPreds.add(RexUtil.composeConjunction(rB, residualPreds, false));
// Add those that are not part of the final set to residual
for (Entry<String, RexNode> e : finalPreds.entrySet()) {
if (!preds.containsKey(e.getKey())) {
// This node was in previous union inputs, but it is not in this one
for (int j = 0; j < i; j++) {
finalResidualPreds.set(j, RexUtil.composeConjunction(rB, Lists.newArrayList(finalResidualPreds.get(j), e.getValue()), false));
}
}
}
// Final preds
finalPreds = preds;
}
List<RexNode> preds = new ArrayList<>(finalPreds.values());
RexNode disjPred = RexUtil.composeDisjunction(rB, finalResidualPreds, false);
if (!disjPred.isAlwaysTrue()) {
preds.add(disjPred);
}
return RelOptPredicateList.of(rB, preds);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveOpConverter method translateJoin.
private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(joinRel.getInput(i));
children.add(inputs[i].inputs.get(0));
baseSrc[i] = inputs[i].tabAlias;
}
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
reduceSinkOp.getConf().setTag(tag);
}
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
}
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
}
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
}
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
filterExpressionsForInput.add(expr);
}
}
filterExpressions.add(filterExpressionsForInput);
}
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveSubQRemoveRelBuilder method call.
/**
* Creates a call to a scalar operator.
*/
public RexNode call(SqlOperator operator, RexNode... operands) {
final RexBuilder builder = cluster.getRexBuilder();
final List<RexNode> operandList = ImmutableList.copyOf(operands);
final RelDataType type = builder.deriveReturnType(operator, operandList);
if (type == null) {
throw new IllegalArgumentException("cannot derive type: " + operator + "; operands: " + Lists.transform(operandList, FN_TYPE));
}
return builder.makeCall(type, operator, operandList);
}
Aggregations