use of org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveSemiJoinRule method perform.
protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs, RelNode topOperator, Join join, RelNode left, Aggregate aggregate) {
LOG.debug("Matched HiveSemiJoinRule");
final RelOptCluster cluster = join.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final ImmutableBitSet rightBits = ImmutableBitSet.range(left.getRowType().getFieldCount(), join.getRowType().getFieldCount());
if (topRefs.intersects(rightBits)) {
return;
}
final JoinInfo joinInfo = join.analyzeCondition();
if (!joinInfo.rightSet().equals(ImmutableBitSet.range(aggregate.getGroupCount()))) {
// By the way, neither a super-set nor a sub-set would work.
return;
}
if (join.getJoinType() == JoinRelType.LEFT) {
// since for LEFT join we are only interested in rows from LEFT we can get rid of right side
call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left)));
return;
}
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (!joinInfo.isEqui()) {
return;
}
LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
final List<Integer> newRightKeyBuilder = Lists.newArrayList();
final List<Integer> aggregateKeys = aggregate.getGroupSet().asList();
for (int key : joinInfo.rightKeys) {
newRightKeyBuilder.add(aggregateKeys.get(key));
}
final ImmutableIntList newRightKeys = ImmutableIntList.copyOf(newRightKeyBuilder);
final RelNode newRight = aggregate.getInput();
final RexNode newCondition = RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight, newRightKeys, rexBuilder);
RelNode semi = null;
// is not expected further down the pipeline. see jira for more details
if (aggregate.getInput() instanceof HepRelVertex && ((HepRelVertex) aggregate.getInput()).getCurrentRel() instanceof Join) {
Join rightJoin = (Join) (((HepRelVertex) aggregate.getInput()).getCurrentRel());
List<RexNode> projects = new ArrayList<>();
for (int i = 0; i < rightJoin.getRowType().getFieldCount(); i++) {
projects.add(rexBuilder.makeInputRef(rightJoin, i));
}
RelNode topProject = call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
} else {
semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
}
call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(semi)));
}
use of org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveOpConverter method genJoin.
private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
// 1. Extract join type
JoinCondDesc[] joinCondns;
boolean semiJoin;
boolean noOuterJoin;
if (join instanceof HiveMultiJoin) {
HiveMultiJoin hmj = (HiveMultiJoin) join;
joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
}
semiJoin = false;
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
semiJoin = join instanceof SemiJoin;
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
} else {
joinType = extractJoinType((Join) join);
}
joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
}
// 2. We create the join aux structures
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
Operator<?>[] childOps = new Operator[children.size()];
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
int outputPos = 0;
for (int pos = 0; pos < children.size(); pos++) {
// 2.1. Backtracking from RS
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
if (inputRS.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = inputRS.getParentOperators().get(0);
ReduceSinkDesc rsDesc = inputRS.getConf();
int[] index = inputRS.getValueIndex();
Byte tag = (byte) rsDesc.getTag();
// 2.1.1. If semijoin...
if (semiJoin && pos != 0) {
exprMap.put(tag, new ArrayList<ExprNodeDesc>());
childOps[pos] = inputRS;
continue;
}
posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(parentColumns.get(i));
info.setInternalName(outputColumnNames.get(outputPos));
info.setTabAlias(tabAlias);
outputColumns.add(info);
reversedExprs.put(outputColumnNames.get(outputPos), tag);
outputPos++;
}
exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
colExprMap.putAll(descriptors);
childOps[pos] = inputRS;
}
// 3. We populate the filters and filterMap structure needed in the join descriptor
List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
int[][] filterMap = new int[children.size()][];
for (int i = 0; i < children.size(); i++) {
filtersPerInput.add(new ArrayList<ExprNodeDesc>());
}
// 3. We populate the filters structure
for (int i = 0; i < filterExpressions.size(); i++) {
int leftPos = joinCondns[i].getLeft();
int rightPos = joinCondns[i].getRight();
for (ExprNodeDesc expr : filterExpressions.get(i)) {
// We need to update the exprNode, as currently
// they refer to columns in the output of the join;
// they should refer to the columns output by the RS
int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
if (inputPos == -1) {
inputPos = leftPos;
}
filtersPerInput.get(inputPos).add(expr);
if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
if (inputPos == leftPos) {
updateFilterMap(filterMap, leftPos, rightPos);
} else {
updateFilterMap(filterMap, rightPos, leftPos);
}
}
}
}
for (int pos = 0; pos < children.size(); pos++) {
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
ReduceSinkDesc rsDesc = inputRS.getConf();
Byte tag = (byte) rsDesc.getTag();
filters.put(tag, filtersPerInput.get(pos));
}
// 4. We create the join operator with its descriptor
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(filterMap);
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
joinOp.getConf().setBaseSrc(baseSrc);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
}
return joinOp;
}
use of org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveJoinCommuteRule method onMatch.
public void onMatch(final RelOptRuleCall call) {
Project topProject = call.rel(0);
Join join = call.rel(1);
// 1. We check if it is a permutation project. If it is
// not, or this is the identity, the rule will do nothing
final Permutation topPermutation = topProject.getPermutation();
if (topPermutation == null) {
return;
}
if (topPermutation.isIdentity()) {
return;
}
// 2. We swap the join
final RelNode swapped = JoinCommuteRule.swap(join, true);
if (swapped == null) {
return;
}
// bail out.
if (swapped instanceof Join) {
return;
}
// 4. We check if it is a permutation project. If it is
// not, or this is the identity, the rule will do nothing
final Project bottomProject = (Project) swapped;
final Permutation bottomPermutation = bottomProject.getPermutation();
if (bottomPermutation == null) {
return;
}
if (bottomPermutation.isIdentity()) {
return;
}
// 5. If the product of the topPermutation and bottomPermutation yields
// the identity, then we can swap the join and remove the project on
// top.
final Permutation product = topPermutation.product(bottomPermutation);
if (!product.isIdentity()) {
return;
}
// 6. Return the new join as a replacement
final Join swappedJoin = (Join) bottomProject.getInput(0);
call.transformTo(swappedJoin);
}
use of org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveJoinPushTransitivePredicatesRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Join join = call.rel(0);
RelOptPredicateList preds = call.getMetadataQuery().getPulledUpPredicates(join);
HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
assert registry != null;
RexBuilder rB = join.getCluster().getRexBuilder();
RelNode lChild = join.getLeft();
RelNode rChild = join.getRight();
Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
List<RexNode> leftPreds = getValidPreds(join.getCluster(), lChild, leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
List<RexNode> rightPreds = getValidPreds(join.getCluster(), rChild, rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false);
RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false);
if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
return;
}
if (!newLeftPredicate.isAlwaysTrue()) {
RelNode curr = lChild;
lChild = filterFactory.createFilter(lChild, newLeftPredicate.accept(new RexReplacer(lChild)));
call.getPlanner().onCopy(curr, lChild);
}
if (!newRightPredicate.isAlwaysTrue()) {
RelNode curr = rChild;
rChild = filterFactory.createFilter(rChild, newRightPredicate.accept(new RexReplacer(rChild)));
call.getPlanner().onCopy(curr, rChild);
}
RelNode newRel = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
call.getPlanner().onCopy(join, newRel);
// Register information about pushed predicates
registry.getPushedPredicates(newRel, 0).addAll(leftPushedPredicates);
registry.getPushedPredicates(newRel, 1).addAll(rightPushedPredicates);
call.transformTo(newRel);
}
use of org.apache.calcite.rel.core.Join in project calcite by apache.
the class MutableRels method toMutable.
public static MutableRel toMutable(RelNode rel) {
if (rel instanceof HepRelVertex) {
return toMutable(((HepRelVertex) rel).getCurrentRel());
}
if (rel instanceof RelSubset) {
return toMutable(Util.first(((RelSubset) rel).getBest(), ((RelSubset) rel).getOriginal()));
}
if (rel instanceof TableScan) {
return MutableScan.of((TableScan) rel);
}
if (rel instanceof Values) {
return MutableValues.of((Values) rel);
}
if (rel instanceof Project) {
final Project project = (Project) rel;
final MutableRel input = toMutable(project.getInput());
return MutableProject.of(input, project.getProjects(), project.getRowType().getFieldNames());
}
if (rel instanceof Filter) {
final Filter filter = (Filter) rel;
final MutableRel input = toMutable(filter.getInput());
return MutableFilter.of(input, filter.getCondition());
}
if (rel instanceof Aggregate) {
final Aggregate aggregate = (Aggregate) rel;
final MutableRel input = toMutable(aggregate.getInput());
return MutableAggregate.of(input, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList());
}
if (rel instanceof Sort) {
final Sort sort = (Sort) rel;
final MutableRel input = toMutable(sort.getInput());
return MutableSort.of(input, sort.getCollation(), sort.offset, sort.fetch);
}
if (rel instanceof Calc) {
final Calc calc = (Calc) rel;
final MutableRel input = toMutable(calc.getInput());
return MutableCalc.of(input, calc.getProgram());
}
if (rel instanceof Exchange) {
final Exchange exchange = (Exchange) rel;
final MutableRel input = toMutable(exchange.getInput());
return MutableExchange.of(input, exchange.getDistribution());
}
if (rel instanceof Collect) {
final Collect collect = (Collect) rel;
final MutableRel input = toMutable(collect.getInput());
return MutableCollect.of(collect.getRowType(), input, collect.getFieldName());
}
if (rel instanceof Uncollect) {
final Uncollect uncollect = (Uncollect) rel;
final MutableRel input = toMutable(uncollect.getInput());
return MutableUncollect.of(uncollect.getRowType(), input, uncollect.withOrdinality);
}
if (rel instanceof Window) {
final Window window = (Window) rel;
final MutableRel input = toMutable(window.getInput());
return MutableWindow.of(window.getRowType(), input, window.groups, window.getConstants());
}
if (rel instanceof TableModify) {
final TableModify modify = (TableModify) rel;
final MutableRel input = toMutable(modify.getInput());
return MutableTableModify.of(modify.getRowType(), input, modify.getTable(), modify.getCatalogReader(), modify.getOperation(), modify.getUpdateColumnList(), modify.getSourceExpressionList(), modify.isFlattened());
}
if (rel instanceof Sample) {
final Sample sample = (Sample) rel;
final MutableRel input = toMutable(sample.getInput());
return MutableSample.of(input, sample.getSamplingParameters());
}
if (rel instanceof TableFunctionScan) {
final TableFunctionScan tableFunctionScan = (TableFunctionScan) rel;
final List<MutableRel> inputs = toMutables(tableFunctionScan.getInputs());
return MutableTableFunctionScan.of(tableFunctionScan.getCluster(), tableFunctionScan.getRowType(), inputs, tableFunctionScan.getCall(), tableFunctionScan.getElementType(), tableFunctionScan.getColumnMappings());
}
// is a sub-class of Join.
if (rel instanceof SemiJoin) {
final SemiJoin semiJoin = (SemiJoin) rel;
final MutableRel left = toMutable(semiJoin.getLeft());
final MutableRel right = toMutable(semiJoin.getRight());
return MutableSemiJoin.of(semiJoin.getRowType(), left, right, semiJoin.getCondition(), semiJoin.getLeftKeys(), semiJoin.getRightKeys());
}
if (rel instanceof Join) {
final Join join = (Join) rel;
final MutableRel left = toMutable(join.getLeft());
final MutableRel right = toMutable(join.getRight());
return MutableJoin.of(join.getRowType(), left, right, join.getCondition(), join.getJoinType(), join.getVariablesSet());
}
if (rel instanceof Correlate) {
final Correlate correlate = (Correlate) rel;
final MutableRel left = toMutable(correlate.getLeft());
final MutableRel right = toMutable(correlate.getRight());
return MutableCorrelate.of(correlate.getRowType(), left, right, correlate.getCorrelationId(), correlate.getRequiredColumns(), correlate.getJoinType());
}
if (rel instanceof Union) {
final Union union = (Union) rel;
final List<MutableRel> inputs = toMutables(union.getInputs());
return MutableUnion.of(union.getRowType(), inputs, union.all);
}
if (rel instanceof Minus) {
final Minus minus = (Minus) rel;
final List<MutableRel> inputs = toMutables(minus.getInputs());
return MutableMinus.of(minus.getRowType(), inputs, minus.all);
}
if (rel instanceof Intersect) {
final Intersect intersect = (Intersect) rel;
final List<MutableRel> inputs = toMutables(intersect.getInputs());
return MutableIntersect.of(intersect.getRowType(), inputs, intersect.all);
}
throw new RuntimeException("cannot translate " + rel + " to MutableRel");
}
Aggregations