use of org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveRelFieldTrimmer method trimFields.
/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
*/
public TrimResult trimFields(HiveMultiJoin join, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
final int fieldCount = join.getRowType().getFieldCount();
final RexNode conditionExpr = join.getCondition();
final List<RexNode> joinFilters = join.getJoinFilters();
// Add in fields used in the condition.
final Set<RelDataTypeField> combinedInputExtraFields = new LinkedHashSet<RelDataTypeField>(extraFields);
RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(combinedInputExtraFields);
inputFinder.inputBitSet.addAll(fieldsUsed);
conditionExpr.accept(inputFinder);
final ImmutableBitSet fieldsUsedPlus = inputFinder.inputBitSet.build();
int inputStartPos = 0;
int changeCount = 0;
int newFieldCount = 0;
List<RelNode> newInputs = new ArrayList<RelNode>();
List<Mapping> inputMappings = new ArrayList<Mapping>();
for (RelNode input : join.getInputs()) {
final RelDataType inputRowType = input.getRowType();
final int inputFieldCount = inputRowType.getFieldCount();
// Compute required mapping.
ImmutableBitSet.Builder inputFieldsUsed = ImmutableBitSet.builder();
for (int bit : fieldsUsedPlus) {
if (bit >= inputStartPos && bit < inputStartPos + inputFieldCount) {
inputFieldsUsed.set(bit - inputStartPos);
}
}
Set<RelDataTypeField> inputExtraFields = Collections.<RelDataTypeField>emptySet();
TrimResult trimResult = trimChild(join, input, inputFieldsUsed.build(), inputExtraFields);
newInputs.add(trimResult.left);
if (trimResult.left != input) {
++changeCount;
}
final Mapping inputMapping = trimResult.right;
inputMappings.add(inputMapping);
// Move offset to point to start of next input.
inputStartPos += inputFieldCount;
newFieldCount += inputMapping.getTargetCount();
}
Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, newFieldCount);
int offset = 0;
int newOffset = 0;
for (int i = 0; i < inputMappings.size(); i++) {
Mapping inputMapping = inputMappings.get(i);
for (IntPair pair : inputMapping) {
mapping.set(pair.source + offset, pair.target + newOffset);
}
offset += inputMapping.getSourceCount();
newOffset += inputMapping.getTargetCount();
}
if (changeCount == 0 && mapping.isIdentity()) {
return new TrimResult(join, Mappings.createIdentity(fieldCount));
}
// Build new join.
final RexVisitor<RexNode> shuttle = new RexPermuteInputsShuttle(mapping, newInputs.toArray(new RelNode[newInputs.size()]));
RexNode newConditionExpr = conditionExpr.accept(shuttle);
List<RexNode> newJoinFilters = Lists.newArrayList();
for (RexNode joinFilter : joinFilters) {
newJoinFilters.add(joinFilter.accept(shuttle));
}
final RelDataType newRowType = RelOptUtil.permute(join.getCluster().getTypeFactory(), join.getRowType(), mapping);
final RelNode newJoin = new HiveMultiJoin(join.getCluster(), newInputs, newConditionExpr, newRowType, join.getJoinInputs(), join.getJoinTypes(), newJoinFilters);
return new TrimResult(newJoin, mapping);
}
use of org.apache.calcite.rex.RexNode in project hive by apache.
the class HiveSemiJoinRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
LOG.debug("Matched HiveSemiJoinRule");
final Project project = call.rel(0);
final Join join = call.rel(1);
final RelNode left = call.rel(2);
final Aggregate aggregate = call.rel(3);
final RelOptCluster cluster = join.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final ImmutableBitSet bits = RelOptUtil.InputFinder.bits(project.getProjects(), null);
final ImmutableBitSet rightBits = ImmutableBitSet.range(left.getRowType().getFieldCount(), join.getRowType().getFieldCount());
if (bits.intersects(rightBits)) {
return;
}
final JoinInfo joinInfo = join.analyzeCondition();
if (!joinInfo.rightSet().equals(ImmutableBitSet.range(aggregate.getGroupCount()))) {
// By the way, neither a super-set nor a sub-set would work.
return;
}
if (join.getJoinType() == JoinRelType.LEFT) {
// since for LEFT join we are only interested in rows from LEFT we can get rid of right side
call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build());
return;
}
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (!joinInfo.isEqui()) {
return;
}
LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
final List<Integer> newRightKeyBuilder = Lists.newArrayList();
final List<Integer> aggregateKeys = aggregate.getGroupSet().asList();
for (int key : joinInfo.rightKeys) {
newRightKeyBuilder.add(aggregateKeys.get(key));
}
final ImmutableIntList newRightKeys = ImmutableIntList.copyOf(newRightKeyBuilder);
final RelNode newRight = aggregate.getInput();
final RexNode newCondition = RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight, newRightKeys, rexBuilder);
RelNode semi = null;
// is not expected further down the pipeline. see jira for more details
if (aggregate.getInput() instanceof HepRelVertex && ((HepRelVertex) aggregate.getInput()).getCurrentRel() instanceof Join) {
Join rightJoin = (Join) (((HepRelVertex) aggregate.getInput()).getCurrentRel());
List<RexNode> projects = new ArrayList<>();
for (int i = 0; i < rightJoin.getRowType().getFieldCount(); i++) {
projects.add(rexBuilder.makeInputRef(rightJoin, i));
}
RelNode topProject = call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
} else {
semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
}
call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build());
}
use of org.apache.calcite.rex.RexNode in project hive by apache.
the class HivePointLookupOptimizerRule method onMatch.
public void onMatch(RelOptRuleCall call) {
final Filter filter = call.rel(0);
final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
// 1. We try to transform possible candidates
RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, minNumORClauses);
RexNode newCondition = transformIntoInClause.apply(condition);
// 2. We merge IN expressions
RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder);
newCondition = mergeInClause.apply(newCondition);
// 3. If we could not transform anything, we bail out
if (newCondition.toString().equals(condition.toString())) {
return;
}
// 4. We create the filter with the new condition
RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition);
call.transformTo(newFilter);
}
use of org.apache.calcite.rex.RexNode in project hive by apache.
the class HivePreFilteringRule method extractCommonOperands.
private static List<RexNode> extractCommonOperands(RexBuilder rexBuilder, RexNode condition, int maxCNFNodeCount) {
assert condition.getKind() == SqlKind.OR;
Multimap<String, RexNode> reductionCondition = LinkedHashMultimap.create();
// Data structure to control whether a certain reference is present in every
// operand
Set<String> refsInAllOperands = null;
// 1. We extract the information necessary to create the predicate for the
// new filter; currently we support comparison functions, in and between
ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
for (int i = 0; i < operands.size(); i++) {
final RexNode operand = operands.get(i);
final RexNode operandCNF = RexUtil.toCnf(rexBuilder, maxCNFNodeCount, operand);
final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
Set<String> refsInCurrentOperand = Sets.newHashSet();
for (RexNode conjunction : conjunctions) {
// We do not know what it is, we bail out for safety
if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) {
return new ArrayList<>();
}
RexCall conjCall = (RexCall) conjunction;
RexNode ref = null;
if (COMPARISON.contains(conjCall.getOperator().getKind())) {
if (conjCall.operands.get(0) instanceof RexInputRef && conjCall.operands.get(1) instanceof RexLiteral) {
ref = conjCall.operands.get(0);
} else if (conjCall.operands.get(1) instanceof RexInputRef && conjCall.operands.get(0) instanceof RexLiteral) {
ref = conjCall.operands.get(1);
} else {
// We do not know what it is, we bail out for safety
return new ArrayList<>();
}
} else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) {
ref = conjCall.operands.get(0);
} else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
ref = conjCall.operands.get(1);
} else {
// We do not know what it is, we bail out for safety
return new ArrayList<>();
}
String stringRef = ref.toString();
reductionCondition.put(stringRef, conjCall);
refsInCurrentOperand.add(stringRef);
}
// Updates the references that are present in every operand up till now
if (i == 0) {
refsInAllOperands = refsInCurrentOperand;
} else {
refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand);
}
// bail out
if (refsInAllOperands.isEmpty()) {
return new ArrayList<>();
}
}
// 2. We gather the common factors and return them
List<RexNode> commonOperands = new ArrayList<>();
for (String ref : refsInAllOperands) {
commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
}
return commonOperands;
}
use of org.apache.calcite.rex.RexNode in project hive by apache.
the class HivePreFilteringRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Filter filter = call.rel(0);
// 0. Register that we have visited this operator in this rule
HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
if (registry != null) {
registry.registerVisited(this, filter);
}
final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
// 1. Recompose filter possibly by pulling out common elements from DNF
// expressions
RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
// 2. We extract possible candidates to be pushed down
List<RexNode> operandsToPushDown = new ArrayList<>();
List<RexNode> deterministicExprs = new ArrayList<>();
List<RexNode> nonDeterministicExprs = new ArrayList<>();
switch(topFilterCondition.getKind()) {
case AND:
ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) topFilterCondition).getOperands());
Set<String> operandsToPushDownDigest = new HashSet<String>();
List<RexNode> extractedCommonOperands = null;
for (RexNode operand : operands) {
if (operand.getKind() == SqlKind.OR) {
extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount);
for (RexNode extractedExpr : extractedCommonOperands) {
if (operandsToPushDownDigest.add(extractedExpr.toString())) {
operandsToPushDown.add(extractedExpr);
}
}
}
// elements of DNF/CNF & extract more deterministic pieces out.
if (HiveCalciteUtil.isDeterministic(operand)) {
deterministicExprs.add(operand);
} else {
nonDeterministicExprs.add(operand);
}
}
// NOTE: Hive by convention doesn't pushdown non deterministic expressions
if (nonDeterministicExprs.size() > 0) {
for (RexNode expr : deterministicExprs) {
if (!operandsToPushDownDigest.contains(expr.toString())) {
operandsToPushDown.add(expr);
operandsToPushDownDigest.add(expr.toString());
}
}
topFilterCondition = RexUtil.pullFactors(rexBuilder, RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false));
}
break;
case OR:
operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount);
break;
default:
return;
}
// 2. If we did not generate anything for the new predicate, we bail out
if (operandsToPushDown.isEmpty()) {
return;
}
// 3. If the new conjuncts are already present in the plan, we bail out
final List<RexNode> newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(), operandsToPushDown);
RexNode newPredicate = RexUtil.composeConjunction(rexBuilder, newConjuncts, false);
if (newPredicate.isAlwaysTrue()) {
return;
}
// 4. Otherwise, we create a new condition
final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, newPredicate);
// 5. We create the new filter that might be pushed down
RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition);
RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition);
// 6. We register both so we do not fire the rule on them again
if (registry != null) {
registry.registerVisited(this, newChildFilter);
registry.registerVisited(this, newTopFilter);
}
call.transformTo(newTopFilter);
}
Aggregations